vendor/llvm/llvm-r126079

author: Dimitry Andric <dim@FreeBSD.org> 2011-02-20 12:57:14 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2011-02-20 12:57:14 +0000
commit: cf099d11218cb6f6c5cce947d6738e347f07fb12 (patch)
tree: d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/CodeGen
parent: 49011b52fcba02a6051957b84705159f52fae4e4 (diff)
139 files changed, 17732 insertions, 11835 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5a634d6ccb01..b520d8fcedc0 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -155,16 +155,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      State->UnionGroups(Reg, 0);
-      KillIndices[Reg] = BB->size();
-      DefIndices[Reg] = ~0u;
-      // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        unsigned AliasReg = *Alias;
-        State->UnionGroups(AliasReg, 0);
-        KillIndices[AliasReg] = BB->size();
-        DefIndices[AliasReg] = ~0u;
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
       }
     }
   }
@@ -176,16 +171,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
            E = (*SI)->livein_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      State->UnionGroups(Reg, 0);
-      KillIndices[Reg] = BB->size();
-      DefIndices[Reg] = ~0u;
-      // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        unsigned AliasReg = *Alias;
-        State->UnionGroups(AliasReg, 0);
-        KillIndices[AliasReg] = BB->size();
-        DefIndices[AliasReg] = ~0u;
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
       }
     }
 
@@ -197,12 +187,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
-    State->UnionGroups(Reg, 0);
-    KillIndices[Reg] = BB->size();
-    DefIndices[Reg] = ~0u;
-    // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias) {
       State->UnionGroups(AliasReg, 0);
       KillIndices[AliasReg] = BB->size();
       DefIndices[AliasReg] = ~0u;
@@ -435,12 +421,9 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
       continue;
 
     // Update def for Reg and aliases.
-    DefIndices[Reg] = Count;
-    for (const unsigned *Alias = TRI->getAliasSet(Reg);
-         *Alias; ++Alias) {
-      unsigned AliasReg = *Alias;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias)
       DefIndices[AliasReg] = Count;
-    }
   }
 }
 
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..20c7625f3253
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,68 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+                                 const VirtRegMap &VRM,
+                                 const BitVector &ReservedRegs)
+  : Pos(0), Reserved(ReservedRegs) {
+  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+  std::pair<unsigned, unsigned> HintPair =
+    VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+  // HintPair.second is a register, phys or virt.
+  Hint = HintPair.second;
+
+  // Translate to physreg, or 0 if not assigned yet.
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = VRM.getPhys(Hint);
+
+  // The remaining allocation order may depend on the hint.
+  tie(Begin, End) = VRM.getTargetRegInfo()
+        .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+
+  // Target-dependent hints require resolution.
+  if (HintPair.first)
+    Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
+                                                      VRM.getMachineFunction());
+
+  // The hint must be a valid physreg for allocation.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || ReservedRegs.test(Hint)))
+    Hint = 0;
+}
+
+unsigned AllocationOrder::next() {
+  // First take the hint.
+  if (!Pos) {
+    Pos = Begin;
+    if (Hint)
+      return Hint;
+  }
+  // Then look at the order from TRI.
+  while(Pos != End) {
+    unsigned Reg = *Pos++;
+    if (Reg != Hint && !Reserved.test(Reg))
+      return Reg;
+  }
+  return 0;
+}
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..3db4b6925fca
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class BitVector;
+class VirtRegMap;
+
+class AllocationOrder {
+  const unsigned *Begin;
+  const unsigned *End;
+  const unsigned *Pos;
+  const BitVector &Reserved;
+  unsigned Hint;
+public:
+
+  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+  /// @param VirtReg      Virtual register to allocate for.
+  /// @param VRM          Virtual register map for function.
+  /// @param ReservedRegs Set of reserved registers as returned by
+  ///        TargetRegisterInfo::getReservedRegs().
+  AllocationOrder(unsigned VirtReg,
+                  const VirtRegMap &VRM,
+                  const BitVector &ReservedRegs);
+
+  /// next - Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next again after it returned 0.
+  /// It will keep returning 0 until rewind() is called.
+  unsigned next();
+
+  /// rewind - Start over from the beginning.
+  void rewind() { Pos = 0; }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index e3dd646c952e..36638c36de67 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -30,7 +31,7 @@ using namespace llvm;
 /// of insertvalue or extractvalue indices that identify a member, return
 /// the linearized index of the start of the member.
 ///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
                                   const unsigned *Indices,
                                   const unsigned *IndicesEnd,
                                   unsigned CurIndex) {
@@ -45,8 +46,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
                                       EE = STy->element_end();
         EI != EE; ++EI) {
       if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+        return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
     }
     return CurIndex;
   }
@@ -55,8 +56,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
     const Type *EltTy = ATy->getElementType();
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
       if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+        return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
     }
     return CurIndex;
   }
@@ -125,7 +126,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
 /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
 /// processed uses a memory 'm' constraint.
 bool
-llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
                                 const TargetLowering &TLI) {
   for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
     InlineAsm::ConstraintInfo &CI = CInfos[i];
@@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   return true;
 }
 
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (CallerRetAttr & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d358ab20ffc5..43e8990a9da1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -178,16 +179,24 @@ bool AsmPrinter::doInitialization(Module &M) {
   if (!M.getModuleInlineAsm().empty()) {
     OutStreamer.AddComment("Start of file scope inline assembly");
     OutStreamer.AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
     OutStreamer.AddComment("End of file scope inline assembly");
     OutStreamer.AddBlankLine();
   }
 
   if (MAI->doesSupportDebugInformation())
     DD = new DwarfDebug(this, &M);
-    
+
   if (MAI->doesSupportExceptionHandling())
-    DE = new DwarfException(this);
+    switch (MAI->getExceptionHandlingType()) {
+    default:
+    case ExceptionHandling::DwarfTable:
+      DE = new DwarfTableException(this);
+      break;
+    case ExceptionHandling::DwarfCFI:
+      DE = new DwarfCFIException(this);
+      break;
+    }
 
   return false;
 }
@@ -282,8 +291,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     
     // Handle common symbols.
     if (GVKind.isCommon()) {
+      unsigned Align = 1 << AlignLog;
+      if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+        Align = 0;
+          
       // .comm _foo, 42, 4
-      OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
       return;
     }
     
@@ -301,11 +314,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
       return;
     }
+
+    unsigned Align = 1 << AlignLog;
+    if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+      Align = 0;
     
     // .local _foo
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
     // .comm _foo, 42, 4
-    OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
     return;
   }
   
@@ -327,6 +344,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // Handle thread local data for mach-o which requires us to output an
   // additional structure of data and mangle the original symbol so that we
   // can reference it later.
+  //
+  // TODO: This should become an "emit thread local global" method on TLOF.
+  // All of this macho specific stuff should be sunk down into TLOFMachO and
+  // stuff like "TLSExtraDataSection" should no longer be part of the parent
+  // TLOF class.  This will also make it more obvious that stuff like
+  // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+  // specific code.
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym = 
@@ -623,7 +647,7 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->beginScope(II);
+        DD->beginInstruction(II);
       }
       
       if (isVerbose())
@@ -657,7 +681,7 @@ void AsmPrinter::EmitFunctionBody() {
       
       if (ShouldPrintDebugScopes) {
         NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->endScope(II);
+        DD->endInstruction(II);
       }
     }
   }
@@ -729,7 +753,20 @@ bool AsmPrinter::doFinalization(Module &M) {
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     EmitGlobalVariable(I);
-  
+
+  // Emit visibility info for declarations
+  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    const Function &F = *I;
+    if (!F.isDeclaration())
+      continue;
+    GlobalValue::VisibilityTypes V = F.getVisibility();
+    if (V == GlobalValue::DefaultVisibility)
+      continue;
+
+    MCSymbol *Name = Mang->getSymbol(&F);
+    EmitVisibility(Name, V);
+  }
+
   // Finalize debug and EH information.
   if (DE) {
     {
@@ -905,14 +942,6 @@ void AsmPrinter::EmitConstantPool() {
 
       const Type *Ty = CPE.getType();
       Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
-
-      // Emit the label with a comment on it.
-      if (isVerbose()) {
-        OutStreamer.GetCommentOS() << "constant pool ";
-        WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
-                          MF->getFunction()->getParent());
-        OutStreamer.GetCommentOS() << '\n';
-      }
       OutStreamer.EmitLabel(GetCPISymbol(CPI));
 
       if (CPE.isMachineConstantPoolEntry())
@@ -983,7 +1012,7 @@ void AsmPrinter::EmitJumpTableInfo() {
       }
     }          
     
-    // On some targets (e.g. Darwin) we want to emit two consequtive labels
+    // On some targets (e.g. Darwin) we want to emit two consecutive labels
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
@@ -1004,6 +1033,7 @@ void AsmPrinter::EmitJumpTableInfo() {
 void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                     const MachineBasicBlock *MBB,
                                     unsigned UID) const {
+  assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ce4519c541e3..98a1bf2f1ce4 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -19,7 +19,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -36,9 +36,8 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
     
-  if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
+  if (MAI->hasLEB128()) {
+    OutStreamer.EmitSLEB128IntValue(Value);
     return;
   }
 
@@ -60,10 +59,10 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
- 
-  if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
+
+  // FIXME: Should we add a PadTo option to the streamer?
+  if (MAI->hasLEB128() && PadTo == 0) {
+    OutStreamer.EmitULEB128IntValue(Value); 
     return;
   }
   
@@ -157,7 +156,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
   
   const MCExpr *Exp =
     TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
-  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
 }
 
 void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
@@ -215,8 +214,8 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
   
   int stackGrowth = TM.getTargetData()->getPointerSize();
-  if (TM.getFrameInfo()->getStackGrowthDirection() !=
-      TargetFrameInfo::StackGrowsUp)
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
     stackGrowth *= -1;
   
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
@@ -277,3 +276,43 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
     }
   }
 }
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  int stackGrowth = TM.getTargetData()->getPointerSize();
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
+    stackGrowth *= -1;
+
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+      } else {
+        assert("Machine move not supported yet");
+        // Reg + Offset
+      }
+    } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+    } else {
+      assert(!Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                                Dst.getOffset());
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index df0316814c08..c6166e2365a5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -34,15 +34,47 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+namespace {
+  struct SrcMgrDiagInfo {
+    const MDNode *LocInfo;
+    LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+    void *DiagContext;
+  };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it.  diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+  SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+  assert(DiagInfo && "Diagnostic context not passed down?");
+  
+  // If the inline asm had metadata associated with it, pull out a location
+  // cookie corresponding to which line the error occurred on.
+  unsigned LocCookie = 0;
+  if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+    unsigned ErrorLine = Diag.getLineNo()-1;
+    if (ErrorLine >= LocInfo->getNumOperands())
+      ErrorLine = 0;
+    
+    if (LocInfo->getNumOperands() != 0)
+      if (const ConstantInt *CI =
+          dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+        LocCookie = CI->getZExtValue();
+  }
+  
+  DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   assert(!Str.empty() && "Can't emit empty inline asm block");
-  
+
   // Remember if the buffer is nul terminated or not so we can avoid a copy.
   bool isNullTerminated = Str.back() == 0;
   if (isNullTerminated)
     Str = Str.substr(0, Str.size()-1);
-  
+
   // If the output streamer is actually a .s file, just emit the blob textually.
   // This is useful in case the asm parser doesn't handle something but the
   // system assembler does.
@@ -50,18 +82,23 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
     OutStreamer.EmitRawText(Str);
     return;
   }
-  
+
   SourceMgr SrcMgr;
-  
+  SrcMgrDiagInfo DiagInfo;
+
   // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
   LLVMContext &LLVMCtx = MMI->getModule()->getContext();
   bool HasDiagHandler = false;
-  if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) {
-    SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler,
-                          LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie);
+  if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+    // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+    // to be invoked, getting DiagInfo passed into it.
+    DiagInfo.LocInfo = LocMDNode;
+    DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+    DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+    SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
     HasDiagHandler = true;
   }
-  
+
   MemoryBuffer *Buffer;
   if (isNullTerminated)
     Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
@@ -70,7 +107,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
 
   // Tell SrcMgr about this buffer, it takes ownership of the buffer.
   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
-  
+
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
                                                   OutContext, OutStreamer,
                                                   *MAI));
@@ -92,15 +129,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
 /// instruction that is an inline asm.
 void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-  
+
   unsigned NumOperands = MI->getNumOperands();
-  
+
   // Count the number of register definitions to find the asm string.
   unsigned NumDefs = 0;
   for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
        ++NumDefs)
     assert(NumDefs != NumOperands-2 && "No asm string?");
-  
+
   assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
 
   // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
@@ -128,22 +165,23 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
   // it.
   unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
   for (unsigned i = MI->getNumOperands(); i != 0; --i) {
-    if (MI->getOperand(i-1).isMetadata())
-      if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
-        if (SrcLoc->getNumOperands() != 0)
-          if (const ConstantInt *CI =
-              dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
-            LocCookie = CI->getZExtValue();
-            break;
-          }
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
   }
-  
+
   // Emit the inline asm to a temporary string so we can emit it through
   // EmitInlineAsm.
   SmallString<256> StringData;
   raw_svector_ostream OS(StringData);
-  
+
   OS << '\t';
 
   // The variant of the current asmprinter.
@@ -151,7 +189,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 
   int CurVariant = -1;            // The number of the {.|.|.} region we are in.
   const char *LastEmitted = AsmStr; // One past the last character emitted.
-  
+
   while (*LastEmitted) {
     switch (*LastEmitted) {
     default: {
@@ -199,18 +237,18 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         ++LastEmitted;  // consume ')' character.
         if (CurVariant == -1)
           OS << '}';     // this is gcc's behavior for } outside a variant
-        else 
+        else
           CurVariant = -1;
         break;
       }
       if (Done) break;
-      
+
       bool HasCurlyBraces = false;
       if (*LastEmitted == '{') {     // ${variable}
         ++LastEmitted;               // Consume '{' character.
         HasCurlyBraces = true;
       }
-      
+
       // If we have ${:foo}, then this is not a real operand reference, it is a
       // "magic" string reference, just like in .td files.  Arrange to call
       // PrintSpecial.
@@ -221,25 +259,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
         if (StrEnd == 0)
           report_fatal_error("Unterminated ${:foo} operand in inline asm"
                              " string: '" + Twine(AsmStr) + "'");
-        
+
         std::string Val(StrStart, StrEnd);
         PrintSpecial(MI, OS, Val.c_str());
         LastEmitted = StrEnd+1;
         break;
       }
-            
+
       const char *IDStart = LastEmitted;
       const char *IDEnd = IDStart;
-      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;      
-      
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
       unsigned Val;
       if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
         report_fatal_error("Bad $ operand number in inline asm string: '" +
                            Twine(AsmStr) + "'");
       LastEmitted = IDEnd;
-      
+
       char Modifier[2] = { 0, 0 };
-      
+
       if (HasCurlyBraces) {
         // If we have curly braces, check for a modifier character.  This
         // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
@@ -248,25 +286,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
           if (*LastEmitted == 0)
             report_fatal_error("Bad ${:} expression in inline asm string: '" +
                                Twine(AsmStr) + "'");
-          
+
           Modifier[0] = *LastEmitted;
           ++LastEmitted;    // Consume modifier character.
         }
-        
+
         if (*LastEmitted != '}')
           report_fatal_error("Bad ${} expression in inline asm string: '" +
                              Twine(AsmStr) + "'");
         ++LastEmitted;    // Consume '}' character.
       }
-      
+
       if (Val >= NumOperands-1)
         report_fatal_error("Invalid $ operand number in inline asm string: '" +
                            Twine(AsmStr) + "'");
-      
+
       // Okay, we finally have a value number.  Ask the target to print this
       // operand!
       if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
-        unsigned OpNo = 2;
+        unsigned OpNo = InlineAsm::MIOp_FirstOperand;
 
         bool Error = false;
 
@@ -310,8 +348,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
     }
   }
   OS << '\n' << (char)0;  // null terminate string.
-  EmitInlineAsm(OS.str(), LocCookie);
-  
+  EmitInlineAsm(OS.str(), LocMD);
+
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
   // enabled, so we use EmitRawText.
   if (OutStreamer.hasRawTextSupport())
@@ -335,7 +373,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
   } else if (!strcmp(Code, "uid")) {
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
-    
+
     // If this is a new LastFn instruction, bump the counter.
     if (LastMI != MI || LastFn != getFunctionNumber()) {
       ++Counter;
@@ -349,7 +387,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
     Msg << "Unknown special formatter '" << Code
          << "' for machine instr: " << *MI;
     report_fatal_error(Msg.str());
-  }    
+  }
 }
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index ca8b8436c11f..306efade7d92 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -3,9 +3,10 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DIE.cpp
+  DwarfCFIException.cpp
   DwarfDebug.cpp
   DwarfException.cpp
+  DwarfTableException.cpp
   OcamlGCPrinter.cpp
   )
 
-target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..68be2eed8f0e
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,138 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitTableModule)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Emit references to all used personality functions
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+  for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
+    Asm->EmitReference(Personalities[i], PerEncoding);
+  }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+
+  if (shouldEmitMoves) {
+    const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
+    Asm->OutStreamer.EmitCFIStartProc();
+
+    // Indicate locations of general callee saved registers in frame.
+    std::vector<MachineMove> Moves;
+    TFL->getInitialFrameState(Moves);
+    Asm->EmitCFIFrameMoves(Moves);
+    Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
+  }
+
+  if (!shouldEmitTable)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Provide LSDA information.
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  if (LSDAEncoding != dwarf::DW_EH_PE_omit)
+    Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                    Asm->getFunctionNumber()),
+                                 LSDAEncoding);
+
+  // Indicate personality routine, if any.
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  if (PerEncoding != dwarf::DW_EH_PE_omit &&
+      MMI->getPersonalities()[MMI->getPersonalityIndex()])
+    Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
+                                                    MMI->getPersonalityIndex()),
+                                        PerEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  if (shouldEmitMoves)
+    Asm->OutStreamer.EmitCFIEndProc();
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitTable)
+    EmitExceptionTable();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c886a5ecc615..5106d5778c29 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,6 +16,7 @@
 #include "DIE.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
+#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -24,12 +25,13 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
@@ -38,7 +40,7 @@
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absense of debug location information explicit."),
      cl::init(false));
 
+#ifndef NDEBUG
+STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
+#endif
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -507,8 +513,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
     return;
 
   unsigned Line = V.getLineNumber();
-  unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
-                                        V.getContext().getFilename());
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -522,8 +529,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
     return;
 
   unsigned Line = G.getLineNumber();
-  unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
-                                        G.getContext().getFilename());
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -542,8 +550,7 @@ void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
   unsigned Line = SP.getLineNumber();
   if (!SP.getContext().Verify())
     return;
-  unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
-                                        SP.getFilename());
+  unsigned FileID = GetOrCreateSourceID(SP.getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -557,10 +564,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
     return;
 
   unsigned Line = Ty.getLineNumber();
-  if (!Ty.getContext().Verify())
+  if (Line == 0 || !Ty.getContext().Verify())
     return;
-  unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
-                                        Ty.getContext().getFilename());
+  unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -574,10 +580,11 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
     return;
 
   unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
   StringRef FN = NS.getFilename();
-  StringRef Dir = NS.getDirectory();
 
-  unsigned FileID = GetOrCreateSourceID(Dir, FN);
+  unsigned FileID = GetOrCreateSourceID(FN);
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -588,8 +595,8 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
 void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
   MachineLocation Location;
   unsigned FrameReg;
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
   Location.set(FrameReg, Offset);
 
   if (DV->variableHasComplexAddress())
@@ -620,8 +627,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
     if (Reg < 32) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
-      Reg = Reg - dwarf::DW_OP_reg0;
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
       addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
@@ -760,8 +766,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
     if (Reg < 32)
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     else {
-      Reg = Reg - dwarf::DW_OP_reg0;
-      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
       addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
@@ -812,6 +817,15 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
 
+  if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
+      && Location.getOffset()) {
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+    addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addBlock(Die, Attribute, 0, Block);
+    return;
+  }
+
   if (Location.isReg()) {
     if (Reg < 32) {
       addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
@@ -834,35 +848,28 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
 }
 
 /// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
-                                    const MachineOperand &MO) {
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
   assert (MO.isReg() && "Invalid machine operand!");
   if (!MO.getReg())
     return false;
   MachineLocation Location;
   Location.set(MO.getReg());
   addAddress(Die, dwarf::DW_AT_location, Location);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
-                                  const MachineOperand &MO) {
+bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
   assert (MO.isImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   unsigned Imm = MO.getImm();
   addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
-                                    const MachineOperand &MO) {
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   assert (MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -883,11 +890,42 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
             (unsigned char)0xFF & FltPtr[Start]);
 
   addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-  if (VS)
-    addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
   return true;
 }
 
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
+                                  bool Unsigned) {
+  if (CI->getBitWidth() <= 64) {
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+              CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+              CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
 
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
@@ -898,8 +936,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
     DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
     ContextDIE->addChild(Die);
   } else if (Context.isSubprogram()) {
-    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
-                                          /*MakeDecl=*/false);
+    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
     ContextDIE->addChild(Die);
   } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
     ContextDIE->addChild(Die);
@@ -1033,16 +1070,23 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     DIDescriptor RTy = Elements.getElement(0);
     addType(&Buffer, DIType(RTy));
 
-    // Add prototype flag.
-    addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-
+    bool isPrototyped = true;
     // Add arguments.
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
-      DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      addType(Arg, DIType(Ty));
-      Buffer.addChild(Arg);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
     }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
   }
     break;
   case dwarf::DW_TAG_structure_type:
@@ -1060,8 +1104,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     for (unsigned i = 0; i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIE *ElemDie = NULL;
-      if (Element.isSubprogram())
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
         ElemDie = createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
       else if (Element.isVariable()) {
         DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
@@ -1094,6 +1151,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       DIDescriptor Context = CTy.getContext();
       addToContextOwner(&Buffer, Context);
     }
+
+    if (Tag == dwarf::DW_TAG_class_type) {
+      DIArray TParams = CTy.getTemplateParams();
+      unsigned N = TParams.getNumElements();
+      // Add template parameters.
+      for (unsigned i = 0; i < N; ++i) {
+        DIDescriptor Element = TParams.getElement(i);
+        if (Element.isTemplateTypeParameter())
+          Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                            DITemplateTypeParameter(Element)));
+        else if (Element.isTemplateValueParameter())
+          Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                            DITemplateValueParameter(Element)));
+      }
+    }
     break;
   }
   default:
@@ -1124,6 +1196,38 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   }
 }
 
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  CompileUnit *TypeCU = getCompileUnit(TP);
+  DIE *ParamDIE = TypeCU->getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  CompileUnit *TVCU = getCompileUnit(TPV);
+  DIE *ParamDIE = TVCU->getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
 /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
 void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t L = SR.getLo();
@@ -1258,7 +1362,8 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
   else if (DT.isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_private);
-  else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
@@ -1268,7 +1373,7 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
 }
 
 /// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
   CompileUnit *SPCU = getCompileUnit(SP);
   DIE *SPDie = SPCU->getDIE(SP);
   if (SPDie)
@@ -1286,10 +1391,7 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
 
   addSourceLine(SPDie, SP);
 
-  // Add prototyped tag, if C or ObjC.
-  unsigned Lang = SP.getCompileUnit().getLanguage();
-  if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
-      Lang == dwarf::DW_LANG_ObjC)
+  if (SP.isPrototyped()) 
     addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
@@ -1307,13 +1409,13 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+    addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
     ContainingTypeMap.insert(std::make_pair(SPDie,
                                             SP.getContainingType()));
   }
 
-  if (MakeDecl || !SP.isDefinition()) {
+  if (!SP.isDefinition()) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1603,6 +1705,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
     addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+  else if (DIVariable(DV->getVariable()).isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
 
   if (Scope->isAbstractScope()) {
     DV->setDIE(VariableDie);
@@ -1625,7 +1729,6 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
     DbgVariableToDbgInstMap.find(DV);
   if (DVI != DbgVariableToDbgInstMap.end()) {
     const MachineInstr *DVInsn = DVI->second;
-    const MCSymbol *DVLabel = findVariableLabel(DV);
     bool updated = false;
     // FIXME : Handle getNumOperands != 3
     if (DVInsn->getNumOperands() == 3) {
@@ -1637,20 +1740,17 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
           addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
           updated = true;
         } else
-          updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+          updated = addRegisterAddress(VariableDie, RegOp);
       }
       else if (DVInsn->getOperand(0).isImm())
-        updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+        updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
-          addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
     } else {
       MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
       if (Location.getReg()) {
         addAddress(VariableDie, dwarf::DW_AT_location, Location);
-        if (DVLabel)
-          addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
-                   DVLabel);
         updated = true;
       }
     }
@@ -1700,6 +1800,16 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
+  SmallVector <DIE *, 8> Children;
+  // Collect lexical scope childrens first.
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+  for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+    if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+      Children.push_back(Variable);
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+    if (DIE *Nested = constructScopeDIE(Scopes[j]))
+      Children.push_back(Nested);
   DIScope DS(Scope->getScopeNode());
   DIE *ScopeDIE = NULL;
   if (Scope->getInlinedAt())
@@ -1715,26 +1825,19 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
     else
       ScopeDIE = updateSubprogramScopeDIE(DS);
   }
-  else
+  else {
+    // There is no need to emit empty lexical block DIE.
+    if (Children.empty())
+      return NULL;
     ScopeDIE = constructLexicalScopeDIE(Scope);
-  if (!ScopeDIE) return NULL;
-
-  // Add variables to scope.
-  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
-  for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
-    if (VariableDIE)
-      ScopeDIE->addChild(VariableDIE);
   }
+  
+  if (!ScopeDIE) return NULL;
 
-  // Add nested scopes.
-  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
-  for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
-    // Define the Scope debug information entry.
-    DIE *NestedDIE = constructScopeDIE(Scopes[j]);
-    if (NestedDIE)
-      ScopeDIE->addChild(NestedDIE);
-  }
+  // Add children
+  for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+         E = Children.end(); I != E; ++I)
+    ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
     addPubTypes(DISubprogram(DS));
@@ -1746,37 +1849,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
-  unsigned DId;
-  assert (DirName.empty() == false && "Invalid directory name!");
 
-  StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
-  if (DI != DirectoryIdMap.end()) {
-    DId = DI->getValue();
-  } else {
-    DId = DirectoryNames.size() + 1;
-    DirectoryIdMap[DirName] = DId;
-    DirectoryNames.push_back(DirName);
-  }
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>");
 
-  unsigned FId;
-  StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
-  if (FI != SourceFileIdMap.end()) {
-    FId = FI->getValue();
-  } else {
-    FId = SourceFileNames.size() + 1;
-    SourceFileIdMap[FileName] = FId;
-    SourceFileNames.push_back(FileName);
-  }
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
 
-  DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
-    SourceIdMap.find(std::make_pair(DId, FId));
-  if (SI != SourceIdMap.end())
-    return SI->second;
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
 
-  unsigned SrcId = SourceIds.size() + 1;  // DW_AT_decl_file cannot be 0.
-  SourceIdMap[std::make_pair(DId, FId)] = SrcId;
-  SourceIds.push_back(std::make_pair(DId, FId));
+  // Print out a .file directive to specify files for .loc directives.
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
 
   return SrcId;
 }
@@ -1802,7 +1889,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(Dir, FN);
+  unsigned ID = GetOrCreateSourceID(FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
@@ -1886,6 +1973,32 @@ static bool isUnsignedDIType(DIType Ty) {
   return false;
 }
 
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+  if (!CE || CE->getNumOperands() != 3 ||
+      CE->getOpcode() != Instruction::GetElementPtr)
+    return NULL;
+
+  // First operand points to a global value.
+  if (!isa<GlobalValue>(CE->getOperand(0)))
+    return NULL;
+
+  // Second operand is zero.
+  const ConstantInt *CI = 
+    dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+  if (!CI || !CI->isZero())
+    return NULL;
+
+  // Third operand is offset.
+  if (!isa<ConstantInt>(CE->getOperand(2)))
+    return NULL;
+
+  return CE;
+}
+
 /// constructGlobalVariableDIE - Construct global variable DIE.
 void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
   DIGlobalVariable GV(N);
@@ -1952,16 +2065,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
-  } else if (Constant *C = GV.getConstant()) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (isUnsignedDIType(GTy))
-          addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
-                  CI->getZExtValue());
-        else
-          addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
-                 CI->getSExtValue());
-    }
+  } else if (ConstantInt *CI = 
+             dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+    addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+  else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    // GV is a merged global.
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
+
   return;
 }
 
@@ -2043,25 +2162,12 @@ void DwarfDebug::beginModule(Module *M) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
 
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
   // Prime section data.
   SectionMap.insert(Asm->getObjFileLowering().getTextSection());
-
-  // Print out .file directives to specify files for .loc directives. These are
-  // printed out early so that they precede any .loc directives.
-  if (Asm->MAI->hasDotLocAndDotFile()) {
-    for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
-      // Remember source id starts at 1.
-      std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
-      // FIXME: don't use sys::path for this!  This should not depend on the
-      // host.
-      sys::Path FullPath(getSourceDirectoryName(Id.first));
-      bool AppendOk =
-        FullPath.appendComponent(getSourceFileName(Id.second));
-      assert(AppendOk && "Could not append filename to directory!");
-      AppendOk = false;
-      Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
-    }
-  }
 }
 
 /// endModule - Emit all Dwarf sections that should come after the content.
@@ -2081,8 +2187,7 @@ void DwarfDebug::endModule() {
       StringRef FName = SP.getLinkageName();
       if (FName.empty())
         FName = SP.getName();
-      NamedMDNode *NMD =
-        M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+      NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
       if (!NMD) continue;
       unsigned E = NMD->getNumOperands();
       if (!E) continue;
@@ -2152,9 +2257,6 @@ void DwarfDebug::endModule() {
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
-  // Emit source line correspondence into a debug line section.
-  emitDebugLines();
-
   // Emit info into a debug pubnames section.
   emitDebugPubNames();
 
@@ -2242,15 +2344,6 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
   }
 }
 
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in undefined reg.
-static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
-  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
-  if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
-    return true;
-  return false;
-}
-
 /// isDbgValueInDefinedReg - Return true if debug value, encoded by
 /// DBG_VALUE instruction, is in a defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
@@ -2275,7 +2368,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
+      if (!MInsn->isDebugValue())
         continue;
       DbgValues.push_back(MInsn);
     }
@@ -2297,19 +2390,18 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
            ME = DbgValues.end(); MI != ME; ++MI) {
       const MDNode *Var =
         (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
-      if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+      if (Var == DV && 
           !PrevMI->isIdenticalTo(*MI))
         MultipleValues.push_back(*MI);
       PrevMI = *MI;
     }
 
-    DbgScope *Scope = findDbgScope(MInsn);
-    bool CurFnArg = false;
+    DbgScope *Scope = NULL;
     if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
-      CurFnArg = true;
-    if (!Scope && CurFnArg)
       Scope = CurrentFnDbgScope;
+    else
+      Scope = findDbgScope(MInsn);
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
@@ -2317,8 +2409,6 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
     Processed.insert(DV);
     DbgVariable *RegVar = new DbgVariable(DV);
     Scope->addVariable(RegVar);
-    if (!CurFnArg)
-      DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
     if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
       DbgVariableToDbgInstMap[AbsVar] = MInsn;
       VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2375,10 +2465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
 
   // Collect info for variables that were optimized out.
   const Function *F = MF->getFunction();
-  const Module *M = F->getParent();
-  if (NamedMDNode *NMD =
-      M->getNamedMetadata(Twine("llvm.dbg.lv.",
-                                getRealLinkageName(F->getName())))) {
+  if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
       if (!DV || !Processed.insert(DV))
@@ -2409,8 +2496,8 @@ const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
   return I->second;
 }
 
-/// beginScope - Process beginning of a scope.
-void DwarfDebug::beginScope(const MachineInstr *MI) {
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (InsnNeedsLabel.count(MI) == 0) {
     LabelsBeforeInsn[MI] = PrevLabel;
     return;
@@ -2444,8 +2531,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
   assert (0 && "Instruction is not processed!");
 }
 
-/// endScope - Process end of a scope.
-void DwarfDebug::endScope(const MachineInstr *MI) {
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
   if (InsnsEndScopeSet.count(MI) != 0) {
     // Emit a label if this instruction ends a scope.
     MCSymbol *Label = MMI->getContext().CreateTempSymbol();
@@ -2624,6 +2711,10 @@ bool DwarfDebug::extractScopeInformation() {
         continue;
       }
 
+      // Ignore DBG_VALUE. It does not contribute any instruction in output.
+      if (MInsn->isDebugValue())
+        continue;
+
       if (RangeBeginMI) {
         // If we have alread seen a beginning of a instruction range and
         // current instruction scope does not match scope of first instruction
@@ -2727,12 +2818,37 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
   return DebugLoc();
 }
 
+#ifndef NDEBUG
+/// CheckLineNumbers - Count basicblocks whose instructions do not have any
+/// line number information.
+static void CheckLineNumbers(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    bool FoundLineNo = false;
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      if (!MI->getDebugLoc().isUnknown()) {
+        FoundLineNo = true;
+        break;
+      }
+    }
+    if (!FoundLineNo && I->size())
+      ++BlocksWithoutLineNo;      
+  }
+}
+#endif
+
 /// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   if (!extractScopeInformation()) return;
 
+#ifndef NDEBUG
+  CheckLineNumbers(MF);
+#endif
+
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
@@ -2775,16 +2891,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
         DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
         if (!DV.Verify()) continue;
         // If DBG_VALUE is for a local variable then it needs a label.
-        if (DV.getTag() != dwarf::DW_TAG_arg_variable
-            && isDbgValueInUndefinedReg(MI) == false)
+        if (DV.getTag() != dwarf::DW_TAG_arg_variable)
           InsnNeedsLabel.insert(MI);
         // DBG_VALUE for inlined functions argument needs a label.
         else if (!DISubprogram(getDISubprogram(DV.getContext())).
                  describes(MF->getFunction()))
           InsnNeedsLabel.insert(MI);
         // DBG_VALUE indicating argument location change needs a label.
-        else if (isDbgValueInUndefinedReg(MI) == false
-                 && !ProcessedArgs.insert(DV))
+        else if (!ProcessedArgs.insert(DV))
           InsnNeedsLabel.insert(MI);
       } else {
         // If location is unknown then instruction needs a location only if
@@ -2820,17 +2934,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     SmallPtrSet<const MDNode *, 16> ProcessedVars;
     collectVariableInfo(MF, ProcessedVars);
 
-    // Get function line info.
-    if (!Lines.empty()) {
-      // Get section line info.
-      unsigned ID = SectionMap.insert(Asm->getCurrentSection());
-      if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
-      std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
-      // Append the function info to section info.
-      SectionLineInfos.insert(SectionLineInfos.end(),
-                              Lines.begin(), Lines.end());
-    }
-
     // Construct abstract scopes.
     for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
            AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2840,10 +2943,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
         StringRef FName = SP.getLinkageName();
         if (FName.empty())
           FName = SP.getName();
-        const Module *M = MF->getFunction()->getParent();
-        if (NamedMDNode *NMD =
-            M->getNamedMetadata(Twine("llvm.dbg.lv.",
-                                      getRealLinkageName(FName)))) {
+        if (NamedMDNode *NMD = 
+            getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
           for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
           DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
           if (!DV || !ProcessedVars.insert(DV))
@@ -2875,7 +2976,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
-  DbgVariableLabelsMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
   InsnsEndScopeSet.clear();
   ConcreteScopes.clear();
@@ -2884,7 +2984,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   AbstractVariables.clear();
   LabelsBeforeInsn.clear();
   LabelsAfterInsn.clear();
-  Lines.clear();
   PrevLabel = NULL;
 }
 
@@ -2906,15 +3005,6 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
   return true;
 }
 
-/// findVariableLabel - Find MCSymbol for the variable.
-const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
-  DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
-    = DbgVariableLabelsMap.find(V);
-  if (I == DbgVariableLabelsMap.end())
-    return NULL;
-  else return I->second;
-}
-
 /// findDbgScope - Find DbgScope for the debug loc attached with an
 /// instruction.
 DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
@@ -2940,7 +3030,6 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
 /// the source line list.
 MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
                                        const MDNode *S) {
-  StringRef Dir;
   StringRef Fn;
 
   unsigned Src = 1;
@@ -2949,25 +3038,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
 
     if (Scope.isCompileUnit()) {
       DICompileUnit CU(S);
-      Dir = CU.getDirectory();
       Fn = CU.getFilename();
+    } else if (Scope.isFile()) {
+      DIFile F(S);
+      Fn = F.getFilename();
     } else if (Scope.isSubprogram()) {
       DISubprogram SP(S);
-      Dir = SP.getDirectory();
       Fn = SP.getFilename();
     } else if (Scope.isLexicalBlock()) {
       DILexicalBlock DB(S);
-      Dir = DB.getDirectory();
       Fn = DB.getFilename();
     } else
       assert(0 && "Unexpected scope info");
 
-    Src = GetOrCreateSourceID(Dir, Fn);
+    Src = GetOrCreateSourceID(Fn);
   }
 
-  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
-  Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
+                                         0, 0);
 
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
   Asm->OutStreamer.EmitLabel(Label);
   return Label;
 }
@@ -3151,6 +3241,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
         Values[i]->EmitValue(Asm, Form);
       break;
     }
+    case dwarf::DW_AT_accessibility: {
+      if (Asm->isVerbose()) {
+        DIEInteger *V = cast<DIEInteger>(Values[i]);
+        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+      }
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
     default:
       // Emit an attribute using the defined form.
       Values[i]->EmitValue(Asm, Form);
@@ -3270,185 +3368,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitDebugLines - Emit source line information.
-///
-void DwarfDebug::emitDebugLines() {
-  // If the target is using .loc/.file, the assembler will be emitting the
-  // .debug_line table automatically.
-  if (Asm->MAI->hasDotLocAndDotFile())
-    return;
-
-  // Minimum line delta, thus ranging from -10..(255-10).
-  const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
-  // Maximum line delta, thus ranging from -10..(255-10).
-  const int MaxLineDelta = 255 + MinLineDelta;
-
-  // Start the dwarf line section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfLineSection());
-
-  // Construct the section header.
-  Asm->OutStreamer.AddComment("Length of Source Line Info");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
-                           Asm->GetTempSymbol("line_begin"), 4);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
-
-  Asm->OutStreamer.AddComment("DWARF version number");
-  Asm->EmitInt16(dwarf::DWARF_VERSION);
-
-  Asm->OutStreamer.AddComment("Prolog Length");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
-                           Asm->GetTempSymbol("line_prolog_begin"), 4);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin"));
-
-  Asm->OutStreamer.AddComment("Minimum Instruction Length");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("Default is_stmt_start flag");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)");
-  Asm->EmitInt8(MinLineDelta);
-  Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)");
-  Asm->EmitInt8(MaxLineDelta);
-  Asm->OutStreamer.AddComment("Special Opcode Base");
-  Asm->EmitInt8(-MinLineDelta);
-
-  // Line number standard opcode encodings argument count
-  Asm->OutStreamer.AddComment("DW_LNS_copy arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_set_file arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_set_column arg count");
-  Asm->EmitInt8(1);
-  Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count");
-  Asm->EmitInt8(0);
-  Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count");
-  Asm->EmitInt8(1);
-
-  // Emit directories.
-  for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
-    const std::string &Dir = getSourceDirectoryName(DI);
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory");
-    Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
-  }
-
-  Asm->OutStreamer.AddComment("End of directories");
-  Asm->EmitInt8(0);
-
-  // Emit files.
-  for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
-    // Remember source id starts at 1.
-    std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
-    const std::string &FN = getSourceFileName(Id.second);
-    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
-    Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
-    Asm->EmitULEB128(Id.first, "Directory #");
-    Asm->EmitULEB128(0, "Mod date");
-    Asm->EmitULEB128(0, "File size");
-  }
-
-  Asm->OutStreamer.AddComment("End of files");
-  Asm->EmitInt8(0);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end"));
-
-  // A sequence for each text section.
-  unsigned SecSrcLinesSize = SectionSourceLines.size();
-
-  for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
-    // Isolate current sections line info.
-    const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
-
-    // Dwarf assumes we start with first line of first source file.
-    unsigned Source = 1;
-    unsigned Line = 1;
-
-    // Construct rows of the address, source, line, column matrix.
-    for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
-      const SrcLineInfo &LineInfo = LineInfos[i];
-      MCSymbol *Label = LineInfo.getLabel();
-      if (!Label->isDefined()) continue; // Not emitted, in dead code.
-
-      if (Asm->isVerbose()) {
-        std::pair<unsigned, unsigned> SrcID =
-          getSourceDirectoryAndFileIds(LineInfo.getSourceID());
-        Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) +
-                                    "/" +
-                                    Twine(getSourceFileName(SrcID.second)) +
-                                    ":" + Twine(LineInfo.getLine()));
-      }
-
-      // Define the line address.
-      Asm->OutStreamer.AddComment("Extended Op");
-      Asm->EmitInt8(0);
-      Asm->OutStreamer.AddComment("Op size");
-      Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
-
-      Asm->OutStreamer.AddComment("DW_LNE_set_address");
-      Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
-      Asm->OutStreamer.AddComment("Location label");
-      Asm->OutStreamer.EmitSymbolValue(Label,
-                                       Asm->getTargetData().getPointerSize(),
-                                       0/*AddrSpace*/);
-
-      // If change of source, then switch to the new source.
-      if (Source != LineInfo.getSourceID()) {
-        Source = LineInfo.getSourceID();
-        Asm->OutStreamer.AddComment("DW_LNS_set_file");
-        Asm->EmitInt8(dwarf::DW_LNS_set_file);
-        Asm->EmitULEB128(Source, "New Source");
-      }
-
-      // If change of line.
-      if (Line != LineInfo.getLine()) {
-        // Determine offset.
-        int Offset = LineInfo.getLine() - Line;
-        int Delta = Offset - MinLineDelta;
-
-        // Update line.
-        Line = LineInfo.getLine();
-
-        // If delta is small enough and in range...
-        if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
-          // ... then use fast opcode.
-          Asm->OutStreamer.AddComment("Line Delta");
-          Asm->EmitInt8(Delta - MinLineDelta);
-        } else {
-          // ... otherwise use long hand.
-          Asm->OutStreamer.AddComment("DW_LNS_advance_line");
-          Asm->EmitInt8(dwarf::DW_LNS_advance_line);
-          Asm->EmitSLEB128(Offset, "Line Offset");
-          Asm->OutStreamer.AddComment("DW_LNS_copy");
-          Asm->EmitInt8(dwarf::DW_LNS_copy);
-        }
-      } else {
-        // Copy the previous row (different address or source)
-        Asm->OutStreamer.AddComment("DW_LNS_copy");
-        Asm->EmitInt8(dwarf::DW_LNS_copy);
-      }
-    }
-
-    emitEndOfLineMatrix(j + 1);
-  }
-
-  if (SecSrcLinesSize == 0)
-    // Because we're emitting a debug_line section, we still need a line
-    // table. The linker and friends expect it to exist. If there's nothing to
-    // put into it, emit an empty table.
-    emitEndOfLineMatrix(1);
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end"));
-}
-
 /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
 void DwarfDebug::emitCommonDebugFrame() {
@@ -3456,8 +3375,8 @@ void DwarfDebug::emitCommonDebugFrame() {
     return;
 
   int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-      TargetFrameInfo::StackGrowsDown)
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
     stackGrowth *= -1;
 
   // Start the dwarf frame section.
@@ -3480,10 +3399,11 @@ void DwarfDebug::emitCommonDebugFrame() {
   Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
   Asm->OutStreamer.AddComment("CIE RA Column");
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
 
   std::vector<MachineMove> Moves;
-  RI->getInitialFrameState(Moves);
+  TFI->getInitialFrameState(Moves);
 
   Asm->EmitFrameMoves(Moves, 0, false);
 
@@ -3667,6 +3587,14 @@ void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
 
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I) {
+    DotDebugLocEntry &Entry = *I;
+    if (I + 1 != DotDebugLocEntries.end())
+      Entry.Merge(I+1);
+  }
+
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
@@ -3676,7 +3604,8 @@ void DwarfDebug::emitDebugLoc() {
   for (SmallVector<DotDebugLocEntry, 4>::iterator
          I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
        I != E; ++I, ++index) {
-    DotDebugLocEntry Entry = *I;
+    DotDebugLocEntry &Entry = *I;
+    if (Entry.isMerged()) continue;
     if (Entry.isEmpty()) {
       Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
       Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f0ff3bc71699..7df0510fbfba 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
@@ -51,6 +52,8 @@ class DIType;
 class DINameSpace;
 class DISubrange;
 class DICompositeType;
+class DITemplateTypeParameter;
+class DITemplateValueParameter;
 
 //===----------------------------------------------------------------------===//
 /// SrcLineInfo - This class is used to record source line correspondence.
@@ -71,6 +74,28 @@ public:
   MCSymbol *getLabel() const { return Label; }
 };
 
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+  const MCSymbol *Begin;
+  const MCSymbol *End;
+  MachineLocation Loc;
+  bool Merged;
+  DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) 
+    : Begin(B), End(E), Loc(L), Merged(false) {}
+  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// labels are referenced is used to find debug_loc offset for a given DIE.
+  bool isEmpty() { return Begin == 0 && End == 0; }
+  bool isMerged() { return Merged; }
+  void Merge(DotDebugLocEntry *Next) {
+    if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+      return;
+    Next->Begin = Begin;
+    Merged = true;
+  }
+} DotDebugLocEntry;
+
 class DwarfDebug {
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
@@ -93,30 +118,9 @@ class DwarfDebug {
   ///
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// DirectoryIdMap - Directory name to directory id map.
-  ///
-  StringMap<unsigned> DirectoryIdMap;
-
-  /// DirectoryNames - A list of directory names.
-  SmallVector<std::string, 8> DirectoryNames;
-
-  /// SourceFileIdMap - Source file name to source file id map.
-  ///
-  StringMap<unsigned> SourceFileIdMap;
-
-  /// SourceFileNames - A list of source file names.
-  SmallVector<std::string, 8> SourceFileNames;
-
   /// SourceIdMap - Source id map, i.e. pair of directory id and source file
   /// id mapped to a unique id.
-  DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
-
-  /// SourceIds - Reverse map from source id to directory id + file id pair.
-  ///
-  SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
-
-  /// Lines - List of source line correspondence.
-  std::vector<SrcLineInfo> Lines;
+  StringMap<unsigned> SourceIdMap;
 
   /// DIEBlocks - A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
@@ -135,10 +139,6 @@ class DwarfDebug {
   ///
   UniqueVector<const MCSection*> SectionMap;
 
-  /// SectionSourceLines - Tracks line numbers per text section.
-  ///
-  std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
-
   // CurrentFnDbgScope - Top level scope for the current function.
   //
   DbgScope *CurrentFnDbgScope;
@@ -175,23 +175,6 @@ class DwarfDebug {
   /// machine instruction.
   DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
 
-  /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
-  DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
-
-  /// DotDebugLocEntry - This struct describes location entries emitted in
-  /// .debug_loc section.
-  typedef struct DotDebugLocEntry {
-    const MCSymbol *Begin;
-    const MCSymbol *End;
-    MachineLocation Loc;
-    DotDebugLocEntry() : Begin(0), End(0) {}
-    DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, 
-                  MachineLocation &L) : Begin(B), End(E), Loc(L) {}
-    /// Empty entries are also used as a trigger to emit temp label. Such
-    /// labels are referenced is used to find debug_loc offset for a given DIE.
-    bool isEmpty() { return Begin == 0 && End == 0; }
-  } DotDebugLocEntry;
-
   /// DotDebugLocEntries - Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
@@ -265,35 +248,10 @@ class DwarfDebug {
 
   DIEInteger *DIEIntegerOne;
 private:
-  
-  /// getSourceDirectoryAndFileIds - Return the directory and file ids that
-  /// maps to the source id. Source id starts at 1.
-  std::pair<unsigned, unsigned>
-  getSourceDirectoryAndFileIds(unsigned SId) const {
-    return SourceIds[SId-1];
-  }
-
-  /// getNumSourceDirectories - Return the number of source directories in the
-  /// debug info.
-  unsigned getNumSourceDirectories() const {
-    return DirectoryNames.size();
-  }
-
-  /// getSourceDirectoryName - Return the name of the directory corresponding
-  /// to the id.
-  const std::string &getSourceDirectoryName(unsigned Id) const {
-    return DirectoryNames[Id - 1];
-  }
-
-  /// getSourceFileName - Return the name of the source file corresponding
-  /// to the id.
-  const std::string &getSourceFileName(unsigned Id) const {
-    return SourceFileNames[Id - 1];
-  }
 
   /// getNumSourceIds - Return the number of unique source ids.
   unsigned getNumSourceIds() const {
-    return SourceIds.size();
+    return SourceIdMap.size();
   }
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -349,13 +307,14 @@ private:
                   const MachineLocation &Location);
 
   /// addRegisterAddress - Add register location entry in variable DIE.
-  bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
 
   /// addConstantValue - Add constant value entry in variable DIE.
-  bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
-  bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
 
   /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
@@ -393,6 +352,14 @@ private:
   /// given DIType.
   DIE *getOrCreateTypeDIE(DIType Ty);
 
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
   void addPubTypes(DISubprogram SP);
 
   /// constructTypeDIE - Construct basic type die from DIBasicType.
@@ -421,7 +388,7 @@ private:
   DIE *createMemberDIE(DIDerivedType DT);
 
   /// createSubprogramDIE - Create new DIE using SP.
-  DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
+  DIE *createSubprogramDIE(DISubprogram SP);
 
   /// getOrCreateDbgScope - Create DbgScope for the scope.
   DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -481,10 +448,6 @@ private:
   ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitDebugLines - Emit source line information.
-  ///
-  void emitDebugLines();
-
   /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
   ///
   void emitCommonDebugFrame();
@@ -543,9 +506,8 @@ private:
 
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
-  /// maps as well.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef FullName);
 
   /// constructCompileUnit - Create new CompileUnit for the given 
   /// metadata node with tag DW_TAG_compile_unit.
@@ -565,12 +527,6 @@ private:
   /// the source line list.
   MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
   
-  /// getSourceLineCount - Return the number of source lines in the debug
-  /// info.
-  unsigned getSourceLineCount() const {
-    return Lines.size();
-  }
-  
   /// recordVariableFrameIndex - Record a variable's index.
   void recordVariableFrameIndex(const DbgVariable *V, int Index);
 
@@ -578,9 +534,6 @@ private:
   /// is found. Update FI to hold value of the index.
   bool findVariableFrameIndex(const DbgVariable *V, int *FI);
 
-  /// findVariableLabel - Find MCSymbol for the variable.
-  const MCSymbol *findVariableLabel(const DbgVariable *V);
-
   /// findDbgScope - Find DbgScope for the debug loc attached with an 
   /// instruction.
   DbgScope *findDbgScope(const MachineInstr *MI);
@@ -630,11 +583,11 @@ public:
   /// getLabelAfterInsn - Return Label immediately following the instruction.
   const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  /// beginScope - Process beginning of a scope.
-  void beginScope(const MachineInstr *MI);
+  /// beginInstruction - Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI);
 
-  /// endScope - Prcess end of a scope.
-  void endScope(const MachineInstr *MI);
+  /// endInstruction - Prcess end of an instruction.
+  void endInstruction(const MachineInstr *MI);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 86a368831e0e..967a2783da14 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -26,7 +26,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -39,238 +39,10 @@
 using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
-  : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
-    shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+  : Asm(A), MMI(Asm->MMI) {}
 
 DwarfException::~DwarfException() {}
 
-/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
-/// is shared among many Frame Description Entries.  There is at least one CIE
-/// in every non-empty .debug_frame section.
-void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
-  // Size and sign of stack growth.
-  int stackGrowth = Asm->getTargetData().getPointerSize();
-  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-      TargetFrameInfo::StackGrowsDown)
-    stackGrowth *= -1;
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  // Begin eh frame section.
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  MCSymbol *EHFrameSym;
-  if (TLOF.isFunctionEHFrameSymbolPrivate())
-    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
-  else
-    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + 
-                                                   Twine(Index));
-  Asm->OutStreamer.EmitLabel(EHFrameSym);
-  
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
-
-  // Define base labels.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
-
-  // Define the eh frame length.
-  Asm->OutStreamer.AddComment("Length of Common Information Entry");
-  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
-                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
-                           4);
-
-  // EH frame header.
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
-  Asm->OutStreamer.AddComment("CIE Identifier Tag");
-  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
-  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
-
-  // The personality presence indicates that language specific information will
-  // show up in the eh frame.  Find out how we are supposed to lower the
-  // personality function reference:
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-  unsigned PerEncoding = TLOF.getPersonalityEncoding();
-
-  char Augmentation[6] = { 0 };
-  unsigned AugmentationSize = 0;
-  char *APtr = Augmentation + 1;
-
-  if (PersonalityFn) {
-    // There is a personality function.
-    *APtr++ = 'P';
-    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
-  }
-
-  if (UsesLSDA[Index]) {
-    // An LSDA pointer is in the FDE augmentation.
-    *APtr++ = 'L';
-    ++AugmentationSize;
-  }
-
-  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
-    // A non-default pointer encoding for the FDE.
-    *APtr++ = 'R';
-    ++AugmentationSize;
-  }
-
-  if (APtr != Augmentation + 1)
-    Augmentation[0] = 'z';
-
-  Asm->OutStreamer.AddComment("CIE Augmentation");
-  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-
-  // Round out reader.
-  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
-  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
-  Asm->OutStreamer.AddComment("CIE Return Address Column");
-
-  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-
-  if (Augmentation[0]) {
-    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
-
-    // If there is a personality, we need to indicate the function's location.
-    if (PersonalityFn) {
-      Asm->EmitEncodingByte(PerEncoding, "Personality");
-      Asm->OutStreamer.AddComment("Personality");
-      Asm->EmitReference(PersonalityFn, PerEncoding);
-    }
-    if (UsesLSDA[Index])
-      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
-    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
-      Asm->EmitEncodingByte(FDEEncoding, "FDE");
-  }
-
-  // Indicate locations of general callee saved registers in frame.
-  std::vector<MachineMove> Moves;
-  RI->getInitialFrameState(Moves);
-  Asm->EmitFrameMoves(Moves, 0, true);
-
-  // On Darwin the linker honors the alignment of eh_frame, which means it must
-  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
-  // holes which confuse readers of eh_frame.
-  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
-}
-
-/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
-  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
-         "Should not emit 'available externally' functions at all");
-
-  const Function *TheFunc = EHFrameInfo.function;
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
-  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
-  unsigned FDEEncoding = TLOF.getFDEEncoding();
-
-  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
-  // Externally visible entry into the functions eh frame info. If the
-  // corresponding function is static, this should not be externally visible.
-  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
-
-  // If corresponding function is weak definition, this should be too.
-  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
-    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                         MCSA_WeakDefinition);
-
-  // If corresponding function is hidden, this should be too.
-  if (TheFunc->hasHiddenVisibility())
-    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           HiddenAttr);
-
-  // If there are no calls then you can't unwind.  This may mean we can omit the
-  // EH Frame, but some environments do not handle weak absolute symbols. If
-  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
-  // info is to be available for non-EH uses.
-  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
-      (!TheFunc->isWeakForLinker() ||
-       !Asm->MAI->getWeakDefDirective() ||
-       TLOF.getSupportsWeakOmittedEHFrame())) {
-    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
-                                    MCConstantExpr::Create(0, Asm->OutContext));
-    // This name has no connection to the function, so it might get
-    // dead-stripped when the function is not, erroneously.  Prohibit
-    // dead-stripping unconditionally.
-    if (Asm->MAI->hasNoDeadStrip())
-      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                           MCSA_NoDeadStrip);
-  } else {
-    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
-
-    // EH frame header.
-    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
-    Asm->EmitLabelDifference(
-                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
-                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
-
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
-                                                  EHFrameInfo.Number));
-
-    Asm->OutStreamer.AddComment("FDE CIE offset");
-    Asm->EmitLabelDifference(
-                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
-                       Asm->GetTempSymbol("eh_frame_common",
-                                          EHFrameInfo.PersonalityIndex), 4);
-
-    MCSymbol *EHFuncBeginSym =
-      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
-
-    Asm->OutStreamer.AddComment("FDE initial location");
-    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
-    
-    Asm->OutStreamer.AddComment("FDE address range");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
-                                                EHFrameInfo.Number),
-                             EHFuncBeginSym,
-                             Asm->GetSizeOfEncodedValue(FDEEncoding));
-
-    // If there is a personality and landing pads then point to the language
-    // specific data area in the exception table.
-    if (MMI->getPersonalities()[0] != NULL) {
-      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
-
-      Asm->EmitULEB128(Size, "Augmentation size");
-      Asm->OutStreamer.AddComment("Language Specific Data Area");
-      if (EHFrameInfo.hasLandingPads)
-        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
-                           LSDAEncoding);
-      else
-        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
-
-    } else {
-      Asm->EmitULEB128(0, "Augmentation size");
-    }
-
-    // Indicate locations of function specific callee saved registers in frame.
-    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
-
-    // On Darwin the linker honors the alignment of eh_frame, which means it
-    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
-    // get holes which confuse readers of eh_frame.
-    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
-                                                  EHFrameInfo.Number));
-
-    // If the function is marked used, this table should be also.  We cannot
-    // make the mark unconditional in this case, since retaining the table also
-    // retains the function in this case, and there is code around that depends
-    // on unused functions (calling undefined externals) being dead-stripped to
-    // link correctly.  Yes, there really is.
-    if (MMI->isUsedFunction(EHFrameInfo.function))
-      if (Asm->MAI->hasNoDeadStrip())
-        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
-                                             MCSA_NoDeadStrip);
-  }
-  Asm->OutStreamer.AddBlankLine();
-}
-
 /// SharedTypeIds - How many leading type ids two landing pads have in common.
 unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
                                        const LandingPadInfo *R) {
@@ -422,7 +194,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(I);
 
     if (!MO.isGlobal()) continue;
-    
+
     const Function *F = dyn_cast<Function>(MO.getGlobal());
     if (F == 0) continue;
 
@@ -430,7 +202,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
       // Be conservative. If we have more than one function operand for this
       // call, then we can't make the assumption that it's the callee and
       // not a parameter to the call.
-      // 
+      //
       // FIXME: Determine if there's a way to say that `F' is the callee or
       // parameter.
       MarkedNoUnwind = false;
@@ -497,8 +269,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       // instruction between the previous try-range and this one may throw,
       // create a call-site entry with no landing pad for the region between the
       // try-ranges.
-      if (SawPotentiallyThrowing &&
-          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+      if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
         CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
         CallSites.push_back(Site);
         PreviousIsInvoke = false;
@@ -520,8 +291,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         };
 
         // Try to merge with the previous call-site. SJLJ doesn't do this
-        if (PreviousIsInvoke &&
-          Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+        if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
           CallSiteEntry &Prev = CallSites.back();
           if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
             // Extend the range of the previous entry.
@@ -531,7 +301,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         }
 
         // Otherwise, create a new call-site.
-        if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+        if (Asm->MAI->isExceptionHandlingDwarf())
           CallSites.push_back(Site);
         else {
           // SjLj EH must maintain the call sites in the order assigned
@@ -549,8 +319,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
   // If some instruction between the previous try-range and the end of the
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
-  if (SawPotentiallyThrowing &&
-      Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+  if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
     CallSiteEntry Site = { LastLabel, 0, 0, 0 };
     CallSites.push_back(Site);
   }
@@ -620,7 +389,7 @@ void DwarfException::EmitExceptionTable() {
   // Call sites.
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-  
+
   unsigned CallSiteTableLength;
   if (IsSJLJ)
     CallSiteTableLength = 0;
@@ -628,7 +397,7 @@ void DwarfException::EmitExceptionTable() {
     unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
     unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
     unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
-    CallSiteTableLength = 
+    CallSiteTableLength =
       CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
   }
 
@@ -656,15 +425,15 @@ void DwarfException::EmitExceptionTable() {
     // mode, this reference will require a relocation by the dynamic linker.
     //
     // Because of this, we have a couple of options:
-    // 
+    //
     //   1) If we are in -static mode, we can always use an absolute reference
     //      from the LSDA, because the static linker will resolve it.
-    //      
+    //
     //   2) Otherwise, if the LSDA section is writable, we can output the direct
     //      reference to the typeinfo and allow the dynamic linker to relocate
     //      it.  Since it is in a writable section, the dynamic linker won't
     //      have a problem.
-    //      
+    //
     //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
     //      we need to use some form of indirection.  For example, on Darwin,
     //      we can output a statically-relocatable reference to a dyld stub. The
@@ -682,11 +451,14 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Begin the exception table.
-  Asm->OutStreamer.SwitchSection(LSDASection);
+  // Sometimes we want not to emit the data into separate section (e.g. ARM
+  // EHABI). In this case LSDASection will be NULL.
+  if (LSDASection)
+    Asm->OutStreamer.SwitchSection(LSDASection);
   Asm->EmitAlignment(2);
 
   // Emit the LSDA.
-  MCSymbol *GCCETSym = 
+  MCSymbol *GCCETSym =
     Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
                                       Twine(Asm->getFunctionNumber()));
   Asm->OutStreamer.EmitLabel(GCCETSym);
@@ -764,7 +536,7 @@ void DwarfException::EmitExceptionTable() {
     }
   } else {
     // DWARF Exception handling
-    assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+    assert(Asm->MAI->isExceptionHandlingDwarf());
 
     // The call-site table is a list of all call sites that may throw an
     // exception (including C++ 'throw' statements) in the procedure
@@ -793,23 +565,23 @@ void DwarfException::EmitExceptionTable() {
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
-      
+
       MCSymbol *EHFuncBeginSym =
         Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-      
+
       MCSymbol *BeginLabel = S.BeginLabel;
       if (BeginLabel == 0)
         BeginLabel = EHFuncBeginSym;
       MCSymbol *EndLabel = S.EndLabel;
       if (EndLabel == 0)
         EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-        
+
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       Asm->OutStreamer.AddComment("Region start");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-      
+
       Asm->OutStreamer.AddComment("Region length");
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
@@ -834,7 +606,7 @@ void DwarfException::EmitExceptionTable() {
     Asm->OutStreamer.AddComment("-- Action Record Table --");
     Asm->OutStreamer.AddBlankLine();
   }
-  
+
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
@@ -888,73 +660,17 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
-    return;
-
-  if (!shouldEmitMovesModule && !shouldEmitTableModule)
-    return;
-
-  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-
-  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
-    EmitCIE(Personalities[I], I);
-
-  for (std::vector<FunctionEHFrameInfo>::iterator
-         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
-    EmitFDE(*I);
+  assert(0 && "Should be implemented");
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  shouldEmitTable = shouldEmitMoves = false;
-
-  // If any landing pads survive, we need an EH table.
-  shouldEmitTable = !MMI->getLandingPads().empty();
-
-  // See if we need frame move info.
-  shouldEmitMoves =
-    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
-
-  if (shouldEmitMoves || shouldEmitTable)
-    // Assumes in correct section after the entry point.
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
-                                                  Asm->getFunctionNumber()));
-
-  shouldEmitTableModule |= shouldEmitTable;
-  shouldEmitMovesModule |= shouldEmitMoves;
+  assert(0 && "Should be implemented");
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  if (!shouldEmitMoves && !shouldEmitTable) return;
-
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
-                                                Asm->getFunctionNumber()));
-
-  // Record if this personality index uses a landing pad.
-  bool HasLandingPad = !MMI->getLandingPads().empty();
-  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
-  
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
-  if (HasLandingPad)
-    EmitExceptionTable();
-
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  MCSymbol *FunctionEHSym =
-    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
-                                      TLOF.isFunctionEHFrameSymbolPrivate());
-  
-  // Save EH frame information
-  EHFrames.
-    push_back(FunctionEHFrameInfo(FunctionEHSym,
-                                  Asm->getFunctionNumber(),
-                                  MMI->getPersonalityIndex(),
-                                  Asm->MF->getFrameInfo()->adjustsStack(),
-                                  !MMI->getLandingPads().empty(),
-                                  MMI->getFrameMoves(),
-                                  Asm->MF->getFunction()));
+  assert(0 && "Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index bc311e67054e..a172e53f8ac7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -35,60 +35,13 @@ class AsmPrinter;
 /// DwarfException - Emits Dwarf exception handling directives.
 ///
 class DwarfException {
+protected:
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
 
   /// MMI - Collected machine module information.
   MachineModuleInfo *MMI;
 
-  struct FunctionEHFrameInfo {
-    MCSymbol *FunctionEHSym;  // L_foo.eh
-    unsigned Number;
-    unsigned PersonalityIndex;
-    bool adjustsStack;
-    bool hasLandingPads;
-    std::vector<MachineMove> Moves;
-    const Function *function;
-
-    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
-                        bool hC, bool hL,
-                        const std::vector<MachineMove> &M,
-                        const Function *f):
-      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
-      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
-  };
-
-  std::vector<FunctionEHFrameInfo> EHFrames;
-
-  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
-  /// uses an LSDA. If so, then we need to encode that information in the CIE's
-  /// augmentation.
-  DenseMap<unsigned, bool> UsesLSDA;
-
-  /// shouldEmitTable - Per-function flag to indicate if EH tables should
-  /// be emitted.
-  bool shouldEmitTable;
-
-  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
-  /// should be emitted.
-  bool shouldEmitMoves;
-
-  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
-  /// should be emitted.
-  bool shouldEmitTableModule;
-
-  /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
-  /// should be emitted.
-  bool shouldEmitMovesModule;
-
-  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
-  /// that is shared among many Frame Description Entries.  There is at least
-  /// one CIE in every non-empty .debug_frame section.
-  void EmitCIE(const Function *Personality, unsigned Index);
-
-  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
-
   /// EmitExceptionTable - Emit landing pads and actions.
   ///
   /// The general organization of the table is complex, but the basic concepts
@@ -172,18 +125,116 @@ public:
   // Main entry points.
   //
   DwarfException(AsmPrinter *A);
-  ~DwarfException();
+  virtual ~DwarfException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfCFIException(AsmPrinter *A);
+  virtual ~DwarfCFIException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfTableException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+
+  /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
+  /// should be emitted.
+  bool shouldEmitMovesModule;
+
+  struct FunctionEHFrameInfo {
+    MCSymbol *FunctionEHSym;  // L_foo.eh
+    unsigned Number;
+    unsigned PersonalityIndex;
+    bool adjustsStack;
+    bool hasLandingPads;
+    std::vector<MachineMove> Moves;
+    const Function *function;
+
+    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+                        bool hC, bool hL,
+                        const std::vector<MachineMove> &M,
+                        const Function *f):
+      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
+  };
+
+  std::vector<FunctionEHFrameInfo> EHFrames;
+
+  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+  /// uses an LSDA. If so, then we need to encode that information in the CIE's
+  /// augmentation.
+  DenseMap<unsigned, bool> UsesLSDA;
+
+  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+  /// that is shared among many Frame Description Entries.  There is at least
+  /// one CIE in every non-empty .debug_frame section.
+  void EmitCIE(const Function *Personality, unsigned Index);
+
+  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfTableException(AsmPrinter *A);
+  virtual ~DwarfTableException();
 
   /// EndModule - Emit all exception information that should come after the
   /// content.
-  void EndModule();
+  virtual void EndModule();
 
   /// BeginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  void BeginFunction(const MachineFunction *MF);
+  virtual void BeginFunction(const MachineFunction *MF);
 
   /// EndFunction - Gather and emit post-function exception information.
-  void EndFunction();
+  virtual void EndFunction();
 };
 
 } // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
new file mode 100644
index 000000000000..751901183cd0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -0,0 +1,349 @@
+//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+// The implementation emits all the necessary tables "by hands".
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfTableException::DwarfTableException(AsmPrinter *A)
+  :  DwarfException(A),
+     shouldEmitTable(false), shouldEmitMoves(false),
+     shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+
+DwarfTableException::~DwarfTableException() {}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries.  There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+  // Size and sign of stack growth.
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
+    stackGrowth *= -1;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  MCSymbol *EHFrameSym;
+  if (TLOF.isFunctionEHFrameSymbolPrivate())
+    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+  else
+    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
+                                                   Twine(Index));
+  Asm->OutStreamer.EmitLabel(EHFrameSym);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
+
+  // Define base labels.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
+
+  // Define the eh frame length.
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
+                           4);
+
+  // EH frame header.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
+  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+  // The personality presence indicates that language specific information will
+  // show up in the eh frame.  Find out how we are supposed to lower the
+  // personality function reference:
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  char Augmentation[6] = { 0 };
+  unsigned AugmentationSize = 0;
+  char *APtr = Augmentation + 1;
+
+  if (PersonalityFn) {
+    // There is a personality function.
+    *APtr++ = 'P';
+    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
+  }
+
+  if (UsesLSDA[Index]) {
+    // An LSDA pointer is in the FDE augmentation.
+    *APtr++ = 'L';
+    ++AugmentationSize;
+  }
+
+  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+    // A non-default pointer encoding for the FDE.
+    *APtr++ = 'R';
+    ++AugmentationSize;
+  }
+
+  if (APtr != Augmentation + 1)
+    Augmentation[0] = 'z';
+
+  Asm->OutStreamer.AddComment("CIE Augmentation");
+  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+
+  // Round out reader.
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+  if (Augmentation[0]) {
+    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
+
+    // If there is a personality, we need to indicate the function's location.
+    if (PersonalityFn) {
+      Asm->EmitEncodingByte(PerEncoding, "Personality");
+      Asm->OutStreamer.AddComment("Personality");
+      Asm->EmitReference(PersonalityFn, PerEncoding);
+    }
+    if (UsesLSDA[Index])
+      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
+    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+      Asm->EmitEncodingByte(FDEEncoding, "FDE");
+  }
+
+  // Indicate locations of general callee saved registers in frame.
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+  Asm->EmitFrameMoves(Moves, 0, true);
+
+  // On Darwin the linker honors the alignment of eh_frame, which means it must
+  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
+  // holes which confuse readers of eh_frame.
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+         "Should not emit 'available externally' functions at all");
+
+  const Function *TheFunc = EHFrameInfo.function;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Externally visible entry into the functions eh frame info. If the
+  // corresponding function is static, this should not be externally visible.
+  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
+
+  // If corresponding function is weak definition, this should be too.
+  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                         MCSA_WeakDefinition);
+
+  // If corresponding function is hidden, this should be too.
+  if (TheFunc->hasHiddenVisibility())
+    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           HiddenAttr);
+
+  // If there are no calls then you can't unwind.  This may mean we can omit the
+  // EH Frame, but some environments do not handle weak absolute symbols. If
+  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+  // info is to be available for non-EH uses.
+  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
+      (!TheFunc->isWeakForLinker() ||
+       !Asm->MAI->getWeakDefDirective() ||
+       TLOF.getSupportsWeakOmittedEHFrame())) {
+    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+                                    MCConstantExpr::Create(0, Asm->OutContext));
+    // This name has no connection to the function, so it might get
+    // dead-stripped when the function is not, erroneously.  Prohibit
+    // dead-stripping unconditionally.
+    if (Asm->MAI->hasNoDeadStrip())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           MCSA_NoDeadStrip);
+  } else {
+    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
+
+    // EH frame header.
+    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+    Asm->EmitLabelDifference(
+                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+                                                  EHFrameInfo.Number));
+
+    Asm->OutStreamer.AddComment("FDE CIE offset");
+    Asm->EmitLabelDifference(
+                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+                       Asm->GetTempSymbol("eh_frame_common",
+                                          EHFrameInfo.PersonalityIndex), 4);
+
+    MCSymbol *EHFuncBeginSym =
+      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
+
+    Asm->OutStreamer.AddComment("FDE initial location");
+    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+
+    Asm->OutStreamer.AddComment("FDE address range");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+                                                EHFrameInfo.Number),
+                             EHFuncBeginSym,
+                             Asm->GetSizeOfEncodedValue(FDEEncoding));
+
+    // If there is a personality and landing pads then point to the language
+    // specific data area in the exception table.
+    if (MMI->getPersonalities()[0] != NULL) {
+      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
+
+      Asm->EmitULEB128(Size, "Augmentation size");
+      Asm->OutStreamer.AddComment("Language Specific Data Area");
+      if (EHFrameInfo.hasLandingPads)
+        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+                           LSDAEncoding);
+      else
+        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
+
+    } else {
+      Asm->EmitULEB128(0, "Augmentation size");
+    }
+
+    // Indicate locations of function specific callee saved registers in frame.
+    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
+
+    // On Darwin the linker honors the alignment of eh_frame, which means it
+    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
+    // get holes which confuse readers of eh_frame.
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+                                                  EHFrameInfo.Number));
+
+    // If the function is marked used, this table should be also.  We cannot
+    // make the mark unconditional in this case, since retaining the table also
+    // retains the function in this case, and there is code around that depends
+    // on unused functions (calling undefined externals) being dead-stripped to
+    // link correctly.  Yes, there really is.
+    if (MMI->isUsedFunction(EHFrameInfo.function))
+      if (Asm->MAI->hasNoDeadStrip())
+        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                             MCSA_NoDeadStrip);
+  }
+  Asm->OutStreamer.AddBlankLine();
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfTableException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitMovesModule && !shouldEmitTableModule)
+    return;
+
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+
+  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+    EmitCIE(Personalities[I], I);
+
+  for (std::vector<FunctionEHFrameInfo>::iterator
+         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+    EmitFDE(*I);
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfTableException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+  shouldEmitMovesModule |= shouldEmitMoves;
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfTableException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Record if this personality index uses a landing pad.
+  bool HasLandingPad = !MMI->getLandingPads().empty();
+  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (HasLandingPad)
+    EmitExceptionTable();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  MCSymbol *FunctionEHSym =
+    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+                                      TLOF.isFunctionEHFrameSymbolPrivate());
+
+  // Save EH frame information
+  EHFrames.
+    push_back(FunctionEHFrameInfo(FunctionEHSym,
+                                  Asm->getFunctionNumber(),
+                                  MMI->getPersonalityIndex(),
+                                  Asm->MF->getFrameInfo()->adjustsStack(),
+                                  !MMI->getLandingPads().empty(),
+                                  MMI->getFrameMoves(),
+                                  Asm->MF->getFunction()));
+}
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c8a63cf2393b..115381767751 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include <cctype>
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2ef115dbd205..d7d0e1b3812b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,15 +1,19 @@
 add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
+  AllocationOrder.cpp
   Analysis.cpp
   BranchFolding.cpp
   CalcSpillWeights.cpp
   CallingConvLower.cpp
+  CodeGen.cpp
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
   DwarfEHPrepare.cpp
+  EdgeBundles.cpp
   ELFCodeEmitter.cpp
   ELFWriter.cpp
+  ExpandISelPseudos.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCStrategy.cpp
@@ -18,10 +22,13 @@ add_llvm_library(LLVMCodeGen
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
+  LiveDebugVariables.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
+  LiveIntervalUnion.cpp
   LiveStackAnalysis.cpp
   LiveVariables.cpp
+  LiveRangeEdit.cpp
   LocalStackSlotAllocation.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
@@ -34,6 +41,7 @@ add_llvm_library(LLVMCodeGen
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
+  MachineLoopRanges.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
@@ -45,15 +53,17 @@ add_llvm_library(LLVMCodeGen
   OcamlGC.cpp
   OptimizePHIs.cpp
   PHIElimination.cpp
+  PHIEliminationUtils.cpp
   Passes.cpp
   PeepholeOptimizer.cpp
-  PostRAHazardRecognizer.cpp
   PostRASchedulerList.cpp
   PreAllocSplitting.cpp
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocBasic.cpp
   RegAllocFast.cpp
+  RegAllocGreedy.cpp
   RegAllocLinearScan.cpp
   RegAllocPBQP.cpp
   RegisterCoalescer.cpp
@@ -63,12 +73,14 @@ add_llvm_library(LLVMCodeGen
   ScheduleDAGEmit.cpp
   ScheduleDAGInstrs.cpp
   ScheduleDAGPrinter.cpp
+  ScoreboardHazardRecognizer.cpp
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
   SimpleRegisterCoalescing.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
   Spiller.cpp
+  SpillPlacement.cpp
   SplitKit.cpp
   Splitter.cpp
   StackProtector.cpp
@@ -83,4 +95,5 @@ add_llvm_library(LLVMCodeGen
   VirtRegRewriter.cpp
   )
 
-target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts)
+add_subdirectory(SelectionDAG)
+add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 1b7e08a8b6bb..76bb3d148b0b 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,12 @@
 using namespace llvm;
 
 char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
-                "Calculate spill weights", false, false);
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
 
 void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
@@ -170,8 +174,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
       totalWeight *= 0.5F;
   }
 
-  li.weight = totalWeight;
-  lis_.normalizeSpillWeight(li);
+  li.weight = normalizeSpillWeight(totalWeight, li.getSize());
 }
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
@@ -218,7 +221,7 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
 
   if (rc == orc)
     return;
-  DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
-               << rc->getName() <<".\n");
+  DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
+               << " to " << rc->getName() <<".\n");
   mri.setRegClass(reg, rc);
 }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 62ad8171a9d4..2ad80b4d3a75 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -34,8 +34,8 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
 // HandleByVal - Allocate a stack slot large enough to pass an argument by
 // value. The size and alignment information of the argument is encoded in its
 // parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
-                          EVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
                           int MinSize, int MinAlign,
                           ISD::ArgFlagsTy ArgFlags) {
   unsigned Align = ArgFlags.getByValAlign();
@@ -51,11 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
 
 /// MarkAllocated - Mark a register and all of its aliases as allocated.
 void CCState::MarkAllocated(unsigned Reg) {
-  UsedRegs[Reg/32] |= 1 << (Reg&31);
-  
-  if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
-    for (; (Reg = *RegAliases); ++RegAliases)
-      UsedRegs[Reg/32] |= 1 << (Reg&31);
+  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+       unsigned Reg = *Alias; ++Alias)
+    UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
 
 /// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -66,12 +64,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
   unsigned NumArgs = Ins.size();
 
   for (unsigned i = 0; i != NumArgs; ++i) {
-    EVT ArgVT = Ins[i].VT;
+    MVT ArgVT = Ins[i].VT;
     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Formal argument #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -84,7 +82,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                           CCAssignFn Fn) {
   // Determine which register each value should be copied into.
   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    EVT VT = Outs[i].VT;
+    MVT VT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
       return false;
@@ -98,12 +96,12 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                             CCAssignFn Fn) {
   // Determine which register each value should be copied into.
   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    EVT VT = Outs[i].VT;
+    MVT VT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Return operand #" << i << " has unhandled type "
-             << VT.getEVTString();
+             << EVT(VT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -116,12 +114,12 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
                                   CCAssignFn Fn) {
   unsigned NumOps = Outs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    EVT ArgVT = Outs[i].VT;
+    MVT ArgVT = Outs[i].VT;
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -130,17 +128,17 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
 
 /// AnalyzeCallOperands - Same as above except it takes vectors of types
 /// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
                                   SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                                   CCAssignFn Fn) {
   unsigned NumOps = ArgVTs.size();
   for (unsigned i = 0; i != NumOps; ++i) {
-    EVT ArgVT = ArgVTs[i];
+    MVT ArgVT = ArgVTs[i];
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call operand #" << i << " has unhandled type "
-             << ArgVT.getEVTString();
+             << EVT(ArgVT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -152,12 +150,12 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
 void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                                 CCAssignFn Fn) {
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-    EVT VT = Ins[i].VT;
+    MVT VT = Ins[i].VT;
     ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
       dbgs() << "Call result #" << i << " has unhandled type "
-             << VT.getEVTString();
+             << EVT(VT).getEVTString();
 #endif
       llvm_unreachable(0);
     }
@@ -166,11 +164,11 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
 
 /// AnalyzeCallResult - Same as above except it's specialized for calls which
 /// produce a single value.
-void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
     dbgs() << "Call result has unhandled type "
-           << VT.getEVTString();
+           << EVT(VT).getEVTString();
 #endif
     llvm_unreachable(0);
   }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..515e6f9fde87
--- /dev/null
+++ b/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,61 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeCalculateSpillWeightsPass(Registry);
+  initializeDeadMachineInstructionElimPass(Registry);
+  initializeGCModuleInfoPass(Registry);
+  initializeIfConverterPass(Registry);
+  initializeLiveDebugVariablesPass(Registry);
+  initializeLiveIntervalsPass(Registry);
+  initializeLiveStacksPass(Registry);
+  initializeLiveVariablesPass(Registry);
+  initializeMachineCSEPass(Registry);
+  initializeMachineDominatorTreePass(Registry);
+  initializeMachineLICMPass(Registry);
+  initializeMachineLoopInfoPass(Registry);
+  initializeMachineModuleInfoPass(Registry);
+  initializeMachineSinkingPass(Registry);
+  initializeMachineVerifierPassPass(Registry);
+  initializeOptimizePHIsPass(Registry);
+  initializePHIEliminationPass(Registry);
+  initializePeepholeOptimizerPass(Registry);
+  initializePreAllocSplittingPass(Registry);
+  initializeProcessImplicitDefsPass(Registry);
+  initializePEIPass(Registry);
+  initializeRALinScanPass(Registry);
+  initializeRegisterCoalescerAnalysisGroup(Registry);
+  initializeRenderMachineFunctionPass(Registry);
+  initializeSimpleRegisterCoalescingPass(Registry);
+  initializeSlotIndexesPass(Registry);
+  initializeLoopSplitterPass(Registry);
+  initializeStackProtectorPass(Registry);
+  initializeStackSlotColoringPass(Registry);
+  initializeStrongPHIEliminationPass(Registry);
+  initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnreachableBlockElimPass(Registry);
+  initializeUnreachableMachineBlockElimPass(Registry);
+  initializeVirtRegMapPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+  initializeCodeGen(*unwrap(R));
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 335d2d8e9bac..f79598de1d9e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -130,21 +130,25 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     return;
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
-  // Any register which was defined within the previous scheduling region
-  // may have been rescheduled and its lifetime may overlap with registers
-  // in ways not reflected in our current liveness state. For each such
-  // register, adjust the liveness state to be conservatively correct.
-  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
-    if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
-      assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-
-      // Mark this register to be non-renamable.
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    if (KillIndices[Reg] != ~0u) {
+      // If Reg is currently live, then mark that it can't be renamed as
+      // we don't know the extent of its live-range anymore (now that it
+      // has been scheduled).
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = Count;
+    } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+      // Any register which was defined within the previous scheduling region
+      // may have been rescheduled and its lifetime may overlap with registers
+      // in ways not reflected in our current liveness state. For each such
+      // register, adjust the liveness state to be conservatively correct.
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
 
       // Move the def index to the end of the previous region, to reflect
       // that the def could theoretically have been scheduled at the end.
       DefIndices[Reg] = InsertPosIndex;
     }
+  }
 
   PrescanInstruction(MI);
   ScanInstruction(MI, Count);
@@ -177,7 +181,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // that have special allocation requirements. Also assume all registers
   // used in a call must not be changed (ABI).
   // FIXME: The issue with predicated instruction is more complex. We are being
-  // conservatively here because the kill markers cannot be trusted after
+  // conservative here because the kill markers cannot be trusted after
   // if-conversion:
   // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
   // ...
@@ -321,8 +325,62 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   }
 }
 
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                                RegRefIter RegRefEnd,
+                                                unsigned NewReg)
+{
+  for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+    MachineOperand *RefOper = I->second;
+
+    // Don't allow the instruction defining AntiDepReg to earlyclobber its
+    // operands, in case they may be assigned to NewReg. In this case antidep
+    // breaking must fail, but it's too rare to bother optimizing.
+    if (RefOper->isDef() && RefOper->isEarlyClobber())
+      return true;
+
+    // Handle cases in which this instructions defines NewReg.
+    MachineInstr *MI = RefOper->getParent();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &CheckOper = MI->getOperand(i);
+
+      if (!CheckOper.isReg() || !CheckOper.isDef() ||
+          CheckOper.getReg() != NewReg)
+        continue;
+
+      // Don't allow the instruction to define NewReg and AntiDepReg.
+      // When AntiDepReg is renamed it will be an illegal op.
+      if (RefOper->isDef())
+        return true;
+
+      // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+      // NewReg
+      if (CheckOper.isEarlyClobber())
+        return true;
+
+      // Don't allow inline asm to define NewReg at all. Who know what it's
+      // doing with it.
+      if (MI->isInlineAsm())
+        return true;
+    }
+  }
+  return false;
+}
+
 unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                                 RegRefIter RegRefEnd,
                                                  unsigned AntiDepReg,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
@@ -338,10 +396,10 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
     // an anti-dependence with this AntiDepReg, because that would
     // re-introduce that anti-dependence.
     if (NewReg == LastNewReg) continue;
-    // If the instruction already has a def of the NewReg, it's not suitable.
-    // For example, Instruction with multiple definitions can result in this
-    // condition.
-    if (MI->modifiesRegister(NewReg, TRI)) continue;
+    // If any instructions that define AntiDepReg also define the NewReg, it's
+    // not suitable.  For example, Instruction with multiple definitions can
+    // result in this condition.
+    if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
     // If NewReg is dead and NewReg's most recent def is not before
     // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
     assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
@@ -548,7 +606,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+      std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                std::multimap<unsigned, MachineOperand *>::iterator>
+        Range = RegRefs.equal_range(AntiDepReg);
+      if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+                                                     AntiDepReg,
                                                      LastNewReg[AntiDepReg],
                                                      RC)) {
         DEBUG(dbgs() << "Breaking anti-dependence edge on "
@@ -558,9 +620,6 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
 
         // Update the references to the old register to refer to the new
         // register.
-        std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
-                  std::multimap<unsigned, MachineOperand *>::iterator>
-           Range = RegRefs.equal_range(AntiDepReg);
         for (std::multimap<unsigned, MachineOperand *>::iterator
              Q = Range.first, QE = Range.second; Q != QE; ++Q) {
           Q->second->setReg(NewReg);
@@ -580,7 +639,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         }
 
         // We just went back in time and modified history; the
-        // liveness information for the anti-depenence reg is now
+        // liveness information for the anti-dependence reg is now
         // inconsistent. Set the state as if it were dead.
         Classes[NewReg] = Classes[AntiDepReg];
         DefIndices[NewReg] = DefIndices[AntiDepReg];
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0ed7c35b0f0c..0daaef273448 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -48,8 +48,10 @@ class TargetRegisterInfo;
     /// pointer.
     std::vector<const TargetRegisterClass*> Classes;
 
-    /// RegRegs - Map registers to all their references within a live range.
+    /// RegRefs - Map registers to all their references within a live range.
     std::multimap<unsigned, MachineOperand *> RegRefs;
+    typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+      RegRefIter;
 
     /// KillIndices - The index of the most recent kill (proceding bottom-up),
     /// or ~0u if the register is not live.
@@ -90,10 +92,14 @@ class TargetRegisterInfo;
   private:
     void PrescanInstruction(MachineInstr *MI);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
-    unsigned findSuitableFreeRegister(MachineInstr *MI,
+    bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                 RegRefIter RegRefEnd,
+                                 unsigned NewReg);
+    unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                      RegRefIter RegRefEnd,
                                       unsigned AntiDepReg,
                                       unsigned LastNewReg,
-                                      const TargetRegisterClass *);
+                                      const TargetRegisterClass *RC);
   };
 }
 
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 318d922adebf..fdc1d9142140 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
+    DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+     initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+    }
 
   private:
     bool isDead(const MachineInstr *MI) const;
@@ -45,13 +47,19 @@ namespace {
 char DeadMachineInstructionElim::ID = 0;
 
 INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
-                "Remove dead machine instructions", false, false);
+                "Remove dead machine instructions", false, false)
 
 FunctionPass *llvm::createDeadMachineInstructionElimPass() {
   return new DeadMachineInstructionElim();
 }
 
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI->isInlineAsm())
+    return false;
+
   // Don't delete instructions with side effects.
   bool SawStore = false;
   if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
@@ -151,7 +159,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && MO.isDef()) {
           unsigned Reg = MO.getReg();
-          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.reset(Reg);
             // Check the subreg set, not the alias set, because a def
             // of a super-register may still be partially live after
@@ -168,7 +176,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &MO = MI->getOperand(i);
         if (MO.isReg() && MO.isUse()) {
           unsigned Reg = MO.getReg();
-          if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.set(Reg);
             for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
                  *AliasSet; ++AliasSet)
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 550fd3e25fb7..0ebb5b0db70e 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -43,7 +43,7 @@ namespace {
     // The eh.selector intrinsic.
     Function *SelectorIntrinsic;
 
-    // _Unwind_Resume_or_Rethrow call.
+    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
     Constant *URoR;
 
     // The EH language-specific catch-all type.
@@ -82,11 +82,11 @@ namespace {
     /// FindAllURoRInvokes - Find all URoR invokes in the function.
     void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
 
-    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
-    /// calls. The "unwind" part of these invokes jump to a landing pad within
-    /// the current function. This is a candidate to merge the selector
-    /// associated with the URoR invoke with the one from the URoR's landing
-    /// pad.
+    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+    /// a landing pad within the current function. This is a candidate to merge
+    /// the selector associated with the URoR invoke with the one from the
+    /// URoR's landing pad.
     bool HandleURoRInvokes();
 
     /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
@@ -100,7 +100,9 @@ namespace {
     DwarfEHPrepare(const TargetMachine *tm) :
       FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
       ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
-      URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
+      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+        initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnFunction(Function &Fn);
 
@@ -224,10 +226,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
   return Changed;
 }
 
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
-/// "unwind" part of these invokes jump to a landing pad within the current
-/// function. This is a candidate to merge the selector associated with the URoR
-/// invoke with the one from the URoR's landing pad.
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
 bool DwarfEHPrepare::HandleURoRInvokes() {
   if (!EHCatchAllValue) {
     EHCatchAllValue =
@@ -247,7 +250,10 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
 
   if (!URoR) {
     URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
-    if (!URoR) return CleanupSelectors(CatchAllSels);
+    if (!URoR) {
+      URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
+      if (!URoR) return CleanupSelectors(CatchAllSels);
+    }
   }
 
   SmallPtrSet<InvokeInst*, 32> URoRInvokes;
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index fb884c9e8b71..e08feeb27539 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -23,7 +23,7 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineRelocation.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class GlobalValue;
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index d14728d8a36c..0fd1e8e83bd7 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -45,6 +45,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetELFWriterInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -64,7 +65,7 @@ char ELFWriter::ID = 0;
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(ID), O(o), TM(tm),
-    OutContext(*new MCContext(*TM.getMCAsmInfo())),
+    OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
     TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -327,6 +328,18 @@ void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
   }
 }
 
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+  // FIXME: this is wrong, a common symbol can be in .data for example.
+  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+    return true;
+
+  return false;
+}
+
+
 // EmitGlobal - Choose the right section for global and emit it
 void ELFWriter::EmitGlobal(const GlobalValue *GV) {
 
@@ -363,7 +376,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
     unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
     GblSym->Size = Size;
 
-    if (S->HasCommonSymbols()) { // Symbol must go to a common section
+    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
       GblSym->SectionIdx = ELF::SHN_COMMON;
 
       // A new linkonce section is created for each global in the
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..aed8bc947991
--- /dev/null
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,86 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+                cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+                /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  EC.clear();
+  EC.grow(2 * MF->size());
+
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
+    const MachineBasicBlock &MBB = *I;
+    unsigned OutE = 2 * MBB.getNumber() + 1;
+    // Join the outgoing bundle with the ingoing bundles of all successors.
+    for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+           SE = MBB.succ_end(); SI != SE; ++SI)
+      EC.join(OutE, 2 * (*SI)->getNumber());
+  }
+  EC.compress();
+  if (ViewEdgeBundles)
+    view();
+  return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+  ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                              bool ShortNames,
+                              const std::string &Title) {
+  const MachineFunction *MF = G.getMachineFunction();
+
+  O << "digraph {\n";
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    unsigned BB = I->getNumber();
+    O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+      << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+      << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+    for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+           SE = I->succ_end(); SI != SE; ++SI)
+      O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+        << "\" [ color=lightgray ]\n";
+  }
+  O << "}\n";
+  return O;
+}
+
+
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..b5ec303f5d93
--- /dev/null
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class ExpandISelPseudos : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+  private:
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Expand ISel Pseudo-instructions";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+                "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+  return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+  // Iterate through each instruction in the function, looking for pseudos.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE; ) {
+      MachineInstr *MI = MBBI++;
+
+      // If MI is a pseudo, expand it.
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.usesCustomInsertionHook()) {
+        Changed = true;
+        MachineBasicBlock *NewMBB =
+          TLI->EmitInstrWithCustomInserter(MI, MBB);
+        // The expansion may involve new basic blocks.
+        if (NewMBB != MBB) {
+          MBB = NewMBB;
+          I = NewMBB;
+          MBBI = NewMBB->begin();
+          MBBE = NewMBB->end();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 0f6e882a7be4..d757cf409d50 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -30,7 +30,6 @@ namespace {
     raw_ostream &OS;
     
   public:
-    Printer() : FunctionPass(ID), OS(errs()) {}
     explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
     
@@ -56,7 +55,7 @@ namespace {
 }
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
-                "Create Garbage Collector Module Metadata", false, false);
+                "Create Garbage Collector Module Metadata", false, false)
 
 // -----------------------------------------------------------------------------
 
@@ -70,7 +69,9 @@ GCFunctionInfo::~GCFunctionInfo() {}
 char GCModuleInfo::ID = 0;
 
 GCModuleInfo::GCModuleInfo()
-  : ImmutablePass(ID) {}
+    : ImmutablePass(ID) {
+  initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
 
 GCModuleInfo::~GCModuleInfo() {
   clear();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 719fa194d8da..766c6ee542a9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -19,11 +19,12 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -123,6 +124,11 @@ GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
 
 // -----------------------------------------------------------------------------
 
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
 FunctionPass *llvm::createGCLoweringPass() {
   return new LowerIntrinsics();
 }
@@ -130,7 +136,9 @@ FunctionPass *llvm::createGCLoweringPass() {
 char LowerIntrinsics::ID = 0;
 
 LowerIntrinsics::LowerIntrinsics()
-  : FunctionPass(ID) {}
+  : FunctionPass(ID) {
+    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
 
 const char *LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
@@ -139,6 +147,7 @@ const char *LowerIntrinsics::getPassName() const {
 void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   FunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTree>();
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now.
@@ -249,9 +258,16 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
   if (NeedsDefaultLoweringPass(S))
     MadeChange |= PerformDefaultLowering(F, S);
   
-  if (NeedsCustomLoweringPass(S))
+  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+  if (UseCustomLoweringPass)
     MadeChange |= S.performCustomLowering(F);
-  
+
+  // Custom lowering may modify the CFG, so dominators must be recomputed.
+  if (UseCustomLoweringPass) {
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+      DT->DT->recalculate(F);
+  }
+
   return MadeChange;
 }
 
@@ -345,13 +361,15 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   MachineBasicBlock::iterator RAI = CI; 
   ++RAI;                                
   
-  if (FI->getStrategy().needsSafePoint(GC::PreCall))
-    FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
-                                              CI->getDebugLoc()));
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
   
-  if (FI->getStrategy().needsSafePoint(GC::PostCall))
-    FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
-                                               CI->getDebugLoc()));
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
 }
 
 void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
@@ -364,12 +382,12 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
 }
 
 void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
-  const TargetRegisterInfo *TRI = TM->getRegisterInfo();
-  assert(TRI && "TargetRegisterInfo not available!");
+  const TargetFrameLowering *TFI = TM->getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
   
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
                                       RE = FI->roots_end(); RI != RE; ++RI)
-    RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
 }
 
 bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0ea30d7a7929..db53b0473a9a 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,7 +17,9 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +28,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
@@ -91,6 +94,8 @@ namespace {
     /// ClobbersPred    - True if BB could modify predicates (e.g. has
     ///                   cmp, call, etc.)
     /// NonPredSize     - Number of non-predicated instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
     /// BB              - Corresponding MachineBasicBlock.
     /// TrueBB / FalseBB- See AnalyzeBranch().
     /// BrCond          - Conditions for end of block conditional branches.
@@ -106,6 +111,8 @@ namespace {
       bool CannotBeCopied  : 1;
       bool ClobbersPred    : 1;
       unsigned NonPredSize;
+      unsigned ExtraCost;
+      unsigned ExtraCost2;
       MachineBasicBlock *BB;
       MachineBasicBlock *TrueBB;
       MachineBasicBlock *FalseBB;
@@ -115,7 +122,7 @@ namespace {
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
                  HasFallThrough(false), IsUnpredicable(false),
                  CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 BB(0), TrueBB(0), FalseBB(0) {}
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
     };
 
     /// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -150,20 +157,31 @@ namespace {
     const TargetLowering *TLI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
+    const MachineLoopInfo *MLI;
     bool MadeChange;
     int FnNum;
   public:
     static char ID;
-    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
+    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+      initializeIfConverterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "If Converter"; }
 
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
-    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                     float Prediction, float Confidence) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                       bool FalseBranch, unsigned &Dups) const;
+                       bool FalseBranch, unsigned &Dups,
+                       float Prediction, float Confidence) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
@@ -188,14 +206,21 @@ namespace {
                                bool IgnoreBr = false);
     void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
-      return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
+                            float Prediction, float Confidence) const {
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction, Confidence);
     }
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
-                            MachineBasicBlock &FBB, unsigned FSize) const {
-      return TSize > 0 && FSize > 0 &&
-        TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
+                            float Prediction, float Confidence) const {
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+                                 Prediction, Confidence);
     }
 
     // blockAlwaysFallThrough - Block ends without a terminator.
@@ -230,7 +255,9 @@ namespace {
   char IfConverter::ID = 0;
 }
 
-INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
 
@@ -238,6 +265,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TLI = MF.getTarget().getTargetLowering();
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  InstrItins = MF.getTarget().getInstrItineraryData();
   if (!TII) return false;
 
   // Tail merge tend to expose more if-conversion opportunities.
@@ -431,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
 /// predecessor) forms a valid simple shape for ifcvt. It also returns the
 /// number of instructions that the ifcvt would need to duplicate if performed
 /// in Dups.
-bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                              float Prediction, float Confidence) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -441,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
 
   if (TrueBBI.BB->pred_size() > 1) {
     if (TrueBBI.CannotBeCopied ||
-        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
+        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+                                        Prediction, Confidence))
       return false;
     Dups = TrueBBI.NonPredSize;
   }
@@ -456,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
 /// returns the number of instructions that the ifcvt would need to duplicate
 /// if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                                bool FalseBranch, unsigned &Dups) const {
+                                bool FalseBranch, unsigned &Dups,
+                                float Prediction, float Confidence) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -478,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
           ++Size;
       }
     }
-    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
+                                        Prediction, Confidence))
       return false;
     Dups = Size;
   }
@@ -493,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
   return TExit && TExit == FalseBBI.BB;
 }
 
-static
-MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
-                                               const TargetInstrInfo *TII) {
-  MachineBasicBlock::iterator I = BB->end();
-  while (I != BB->begin()) {
-    --I;
-    if (!I->getDesc().isBranch())
-      break;
-  }
-  return I;
-}
-
 /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
 /// with their common predecessor) forms a valid diamond shape for ifcvt.
 bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -533,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
       (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
     return false;
 
-  MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
   MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
   MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
-  // Skip dbg_value instructions
-  while (TI != TIE && TI->isDebugValue())
-    ++TI;
-  while (FI != FIE && FI->isDebugValue())
-    ++FI;
-  while (TI != TIE && FI != FIE) {
+  while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TI->isDebugValue()) {
-      while (TI != TIE && TI->isDebugValue())
-        ++TI;
-      if (TI == TIE)
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
         break;
     }
-    if (FI->isDebugValue()) {
-      while (FI != FIE && FI->isDebugValue())
-        ++FI;
-      if (FI == FIE)
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
         break;
     }
-    if (!TI->isIdenticalTo(FI))
+    if (!TIB->isIdenticalTo(FIB))
       break;
     ++Dups1;
-    ++TI;
-    ++FI;
+    ++TIB;
+    ++FIB;
   }
 
-  TI = firstNonBranchInst(TrueBBI.BB, TII);
-  FI = firstNonBranchInst(FalseBBI.BB, TII);
-  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
-  // Skip dbg_value instructions at end of the bb's.
-  while (TI != TIB && TI->isDebugValue())
-    --TI;
-  while (FI != FIB && FI->isDebugValue())
-    --FI;
-  while (TI != TIB && FI != FIB) {
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
+  while (TIE != TIB) {
+    --TIE;
+    if (!TIE->getDesc().isBranch())
+      break;
+  }
+  while (FIE != FIB) {
+    --FIE;
+    if (!FIE->getDesc().isBranch())
+      break;
+  }
+
+  // If Dups1 includes all of a block, then don't count duplicate
+  // instructions at the end of the blocks.
+  if (TIB == TIE || FIB == FIE)
+    return true;
+
+  // Count duplicate instructions at the ends of the blocks.
+  while (TIE != TIB && FIE != FIB) {
     // Skip dbg_value instructions. These do not count.
-    if (TI->isDebugValue()) {
-      while (TI != TIB && TI->isDebugValue())
-        --TI;
-      if (TI == TIB)
+    if (TIE->isDebugValue()) {
+      while (TIE != TIB && TIE->isDebugValue())
+        --TIE;
+      if (TIE == TIB)
         break;
     }
-    if (FI->isDebugValue()) {
-      while (FI != FIB && FI->isDebugValue())
-        --FI;
-      if (FI == FIB)
+    if (FIE->isDebugValue()) {
+      while (FIE != FIB && FIE->isDebugValue())
+        --FIE;
+      if (FIE == FIB)
         break;
     }
-    if (!TI->isIdenticalTo(FI))
+    if (!TIE->isIdenticalTo(FIE))
       break;
     ++Dups2;
-    --TI;
-    --FI;
+    --TIE;
+    --FIE;
   }
 
   return true;
@@ -627,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
 
   // Then scan all the instructions.
   BBI.NonPredSize = 0;
+  BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
   BBI.ClobbersPred = false;
   for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
        I != E; ++I) {
@@ -641,9 +670,15 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
 
     if (!isCondBr) {
-      if (!isPredicated)
+      if (!isPredicated) {
         BBI.NonPredSize++;
-      else if (!AlreadyPredicated) {
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
+      } else if (!AlreadyPredicated) {
         // FIXME: This instruction is already predicated before the
         // if-conversion pass. It's probably something like a conditional move.
         // Mark this block unpredicable for now.
@@ -765,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   bool TNeedSub = TrueBBI.Predicate.size() > 0;
   bool FNeedSub = FalseBBI.Predicate.size() > 0;
   bool Enqueued = false;
+  
+  // Try to predict the branch, using loop info to guide us.
+  // General heuristics are:
+  //   - backedge -> 90% taken
+  //   - early exit -> 20% taken
+  //   - branch predictor confidence -> 90%
+  float Prediction = 0.5f;
+  float Confidence = 0.9f;
+  MachineLoop *Loop = MLI->getLoopFor(BB);
+  if (Loop) {
+    if (TrueBBI.BB == Loop->getHeader())
+      Prediction = 0.9f;
+    else if (FalseBBI.BB == Loop->getHeader())
+      Prediction = 0.1f;
+
+    MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+    MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+    if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+      Prediction = 0.2f;
+    else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+      Prediction = 0.8f;
+  }
+  
   if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
-                         *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+                         *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+                         Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
       FeasibilityAnalysis(FalseBBI, RevCond)) {
     // Diamond:
@@ -783,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
     // Triangle:
     //   EBB
@@ -797,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
     Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
     Enqueued = true;
   }
 
-  if (ValidSimple(TrueBBI, Dups) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+  if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
     // Simple (split, no rejoin):
     //   EBB
@@ -820,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   if (CanRevCond) {
     // Try the other path...
-    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidSimple(FalseBBI, Dups) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+    if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond)) {
       Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -1365,6 +1437,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     MachineInstr *MI = MF.CloneMachineInstr(I);
     ToBBI.BB->insert(ToBBI.BB->end(), MI);
     ToBBI.NonPredSize++;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
 
     if (!TII->isPredicated(I) && !MI->isDebugValue()) {
       if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1438,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   FromBBI.Predicate.clear();
 
   ToBBI.NonPredSize += FromBBI.NonPredSize;
+  ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
   FromBBI.NonPredSize = 0;
+  FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
 
   ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
   ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index b965bfdcf3b8..a1bd972d38e2 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -12,28 +12,34 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "spiller"
+#define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "SplitKit.h"
+#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static cl::opt<bool>
+VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
+
 namespace {
 class InlineSpiller : public Spiller {
   MachineFunctionPass &pass_;
   MachineFunction &mf_;
   LiveIntervals &lis_;
-  MachineLoopInfo &loops_;
+  LiveStacks &lss_;
+  AliasAnalysis *aa_;
   VirtRegMap &vrm_;
   MachineFrameInfo &mfi_;
   MachineRegisterInfo &mri_;
@@ -41,19 +47,12 @@ class InlineSpiller : public Spiller {
   const TargetRegisterInfo &tri_;
   const BitVector reserved_;
 
-  SplitAnalysis splitAnalysis_;
-
   // Variables that are valid during spill(), but used by multiple methods.
-  LiveInterval *li_;
-  SmallVectorImpl<LiveInterval*> *newIntervals_;
+  LiveRangeEdit *edit_;
   const TargetRegisterClass *rc_;
   int stackSlot_;
-  const SmallVectorImpl<LiveInterval*> *spillIs_;
 
-  // Values of the current interval that can potentially remat.
-  SmallPtrSet<VNInfo*, 8> reMattable_;
-
-  // Values in reMattable_ that failed to remat at some point.
+  // Values that failed to remat at some point.
   SmallPtrSet<VNInfo*, 8> usedValues_;
 
   ~InlineSpiller() {}
@@ -65,30 +64,29 @@ public:
     : pass_(pass),
       mf_(mf),
       lis_(pass.getAnalysis<LiveIntervals>()),
-      loops_(pass.getAnalysis<MachineLoopInfo>()),
+      lss_(pass.getAnalysis<LiveStacks>()),
+      aa_(&pass.getAnalysis<AliasAnalysis>()),
       vrm_(vrm),
       mfi_(*mf.getFrameInfo()),
       mri_(mf.getRegInfo()),
       tii_(*mf.getTarget().getInstrInfo()),
       tri_(*mf.getTarget().getRegisterInfo()),
-      reserved_(tri_.getReservedRegs(mf_)),
-      splitAnalysis_(mf, lis_, loops_) {}
+      reserved_(tri_.getReservedRegs(mf_)) {}
 
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs);
+             const SmallVectorImpl<LiveInterval*> &spillIs);
 
-private:
-  bool split();
+  void spill(LiveRangeEdit &);
 
-  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx);
+private:
   bool reMaterializeFor(MachineBasicBlock::iterator MI);
   void reMaterializeAll();
 
   bool coalesceStackAccess(MachineInstr *MI);
   bool foldMemoryOperand(MachineBasicBlock::iterator MI,
-                         const SmallVectorImpl<unsigned> &Ops);
+                         const SmallVectorImpl<unsigned> &Ops,
+                         MachineInstr *LoadMI = 0);
   void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
   void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
 };
@@ -98,106 +96,41 @@ namespace llvm {
 Spiller *createInlineSpiller(MachineFunctionPass &pass,
                              MachineFunction &mf,
                              VirtRegMap &vrm) {
+  if (VerifySpills)
+    mf.verify(&pass, "When creating inline spiller");
   return new InlineSpiller(pass, mf, vrm);
 }
 }
 
-/// split - try splitting the current interval into pieces that may allocate
-/// separately. Return true if successful.
-bool InlineSpiller::split() {
-  splitAnalysis_.analyze(li_);
-
-  if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
-    // We can split, but li_ may be left intact with fewer uses.
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitAroundLoop(loop))
-      return true;
-  }
-
-  // Try splitting into single block intervals.
-  SplitAnalysis::BlockPtrSet blocks;
-  if (splitAnalysis_.getMultiUseBlocks(blocks)) {
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitSingleBlocks(blocks))
-      return true;
-  }
-
-  // Try splitting inside a basic block.
-  if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
-    if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
-          .splitInsideBlock(MBB))
-      return true;
-  }
-
-  // We may have been able to split out some uses, but the original interval is
-  // intact, and it should still be spilled.
-  return false;
-}
-
-/// allUsesAvailableAt - Return true if all registers used by OrigMI at
-/// OrigIdx are also available with the same value at UseIdx.
-bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
-                                       SlotIndex OrigIdx,
-                                       SlotIndex UseIdx) {
-  OrigIdx = OrigIdx.getUseIndex();
-  UseIdx = UseIdx.getUseIndex();
-  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = OrigMI->getOperand(i);
-    if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
-      continue;
-    // Reserved registers are OK.
-    if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
-      continue;
-    // We don't want to move any defs.
-    if (MO.isDef())
-      return false;
-    // We cannot depend on virtual registers in spillIs_. They will be spilled.
-    for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
-      if ((*spillIs_)[si]->reg == MO.getReg())
-        return false;
-
-    LiveInterval &LI = lis_.getInterval(MO.getReg());
-    const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
-    if (!OVNI)
-      continue;
-    if (OVNI != LI.getVNInfoAt(UseIdx))
-      return false;
-  }
-  return true;
-}
-
-/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of
 /// reloading it.
 bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
   SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
-  VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+  VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+
   if (!OrigVNI) {
     DEBUG(dbgs() << "\tadding <undef> flags: ");
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+      if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
         MO.setIsUndef();
     }
     DEBUG(dbgs() << UseIdx << '\t' << *MI);
     return true;
   }
-  if (!reMattable_.count(OrigVNI)) {
-    DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
-                 << UseIdx << '\t' << *MI);
-    return false;
-  }
-  MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
-  if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+
+  LiveRangeEdit::Remat RM(OrigVNI);
+  if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
     usedValues_.insert(OrigVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
     return false;
   }
 
-  // If the instruction also writes li_->reg, it had better not require the same
-  // register for uses and defs.
+  // If the instruction also writes edit_->getReg(), it had better not require
+  // the same register for uses and defs.
   bool Reads, Writes;
   SmallVector<unsigned, 8> Ops;
-  tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+  tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
   if (Writes) {
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(Ops[i]);
@@ -209,62 +142,57 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
     }
   }
 
+  // Before rematerializing into a register for a single instruction, try to
+  // fold a load into the instruction. That avoids allocating a new register.
+  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+    edit_->markRematerialized(RM.ParentVNI);
+    return true;
+  }
+
   // Alocate a new register for the remat.
-  unsigned NewVReg = mri_.createVirtualRegister(rc_);
-  vrm_.grow();
-  LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+  LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
   NewLI.markNotSpillable();
-  newIntervals_->push_back(&NewLI);
+
+  // Rematting for a copy: Set allocation hint to be the destination register.
+  if (MI->isCopy())
+    mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
 
   // Finally we can rematerialize OrigMI before MI.
-  MachineBasicBlock &MBB = *MI->getParent();
-  tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
-  MachineBasicBlock::iterator RematMI = MI;
-  SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
-  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t' << *RematMI);
+  SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+                                            lis_, tii_, tri_);
+  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
+               << *lis_.getInstructionFromIndex(DefIdx));
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(Ops[i]);
-    if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
-      MO.setReg(NewVReg);
+    if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+      MO.setReg(NewLI.reg);
       MO.setIsKill();
     }
   }
   DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
 
-  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
-                                       lis_.getVNInfoAllocator());
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   return true;
 }
 
-/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
   // Do a quick scan of the interval values to find if any are remattable.
-  reMattable_.clear();
-  usedValues_.clear();
-  for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
-       E = li_->vni_end(); I != E; ++I) {
-    VNInfo *VNI = *I;
-    if (VNI->isUnused() || !VNI->isDefAccurate())
-      continue;
-    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
-    if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
-      continue;
-    reMattable_.insert(VNI);
-  }
-
-  // Often, no defs are remattable.
-  if (reMattable_.empty())
+  if (!edit_->anyRematerializable(lis_, tii_, aa_))
     return;
 
-  // Try to remat before all uses of li_->reg.
+  usedValues_.clear();
+
+  // Try to remat before all uses of edit_->getReg().
   bool anyRemat = false;
   for (MachineRegisterInfo::use_nodbg_iterator
-       RI = mri_.use_nodbg_begin(li_->reg);
+       RI = mri_.use_nodbg_begin(edit_->getReg());
        MachineInstr *MI = RI.skipInstruction();)
      anyRemat |= reMaterializeFor(MI);
 
@@ -273,33 +201,35 @@ void InlineSpiller::reMaterializeAll() {
 
   // Remove any values that were completely rematted.
   bool anyRemoved = false;
-  for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
-       E = reMattable_.end(); I != E; ++I) {
+  for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
+       E = edit_->getParent().vni_end(); I != E; ++I) {
     VNInfo *VNI = *I;
-    if (VNI->hasPHIKill() || usedValues_.count(VNI))
+    if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
+        usedValues_.count(VNI))
       continue;
     MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
     DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
     lis_.RemoveMachineInstrFromMaps(DefMI);
     vrm_.RemoveMachineInstrFromMaps(DefMI);
     DefMI->eraseFromParent();
-    VNI->setIsDefAccurate(false);
+    VNI->def = SlotIndex();
     anyRemoved = true;
   }
 
   if (!anyRemoved)
     return;
 
-  // Removing values may cause debug uses where li_ is not live.
-  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+  // Removing values may cause debug uses where parent is not live.
+  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
        MachineInstr *MI = RI.skipInstruction();) {
     if (!MI->isDebugValue())
       continue;
-    // Try to preserve the debug value if li_ is live immediately after it.
+    // Try to preserve the debug value if parent is live immediately after it.
     MachineBasicBlock::iterator NextMI = MI;
     ++NextMI;
     if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
-      VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+      SlotIndex Idx = lis_.getInstructionIndex(NextMI);
+      VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
       if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
         continue;
     }
@@ -317,7 +247,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
     return false;
 
   // We have a stack access. Is it the right register and slot?
-  if (reg != li_->reg || FI != stackSlot_)
+  if (reg != edit_->getReg() || FI != stackSlot_)
     return false;
 
   DEBUG(dbgs() << "Coalescing stack access: " << *MI);
@@ -327,9 +257,13 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
 }
 
 /// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// Return true on success, and MI will be erased.
+/// @param MI     Instruction using or defining the current register.
+/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return       True on success, and MI will be erased.
 bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                      const SmallVectorImpl<unsigned> &Ops) {
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) {
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
   SmallVector<unsigned, 8> FoldOps;
@@ -341,16 +275,22 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
     // FIXME: Teach targets to deal with subregs.
     if (MO.getSubReg())
       return false;
+    // We cannot fold a load instruction into a def.
+    if (LoadMI && MO.isDef())
+      return false;
     // Tied use operands should not be passed to foldMemoryOperand.
     if (!MI->isRegTiedToDefOperand(Idx))
       FoldOps.push_back(Idx);
   }
 
-  MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+  MachineInstr *FoldMI =
+                LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
+                       : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
   if (!FoldMI)
     return false;
   lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
-  vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+  if (!LoadMI)
+    vrm_.addSpillSlotUse(stackSlot_, FoldMI);
   MI->eraseFromParent();
   DEBUG(dbgs() << "\tfolded: " << *FoldMI);
   return true;
@@ -366,7 +306,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
   SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
   vrm_.addSpillSlotUse(stackSlot_, MI);
   DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
-  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
                                        lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
 }
@@ -375,44 +315,58 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
 void InlineSpiller::insertSpill(LiveInterval &NewLI,
                                 MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
+
+  // Get the defined value. It could be an early clobber so keep the def index.
   SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+  VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+  assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
+  Idx = VNI->def;
+
   tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
   --MI; // Point to store instruction.
   SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
   vrm_.addSpillSlotUse(stackSlot_, MI);
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
-  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
-                                        lis_.getVNInfoAllocator());
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
 }
 
 void InlineSpiller::spill(LiveInterval *li,
                           SmallVectorImpl<LiveInterval*> &newIntervals,
-                          SmallVectorImpl<LiveInterval*> &spillIs) {
-  DEBUG(dbgs() << "Inline spilling " << *li << "\n");
-  assert(li->isSpillable() && "Attempting to spill already spilled value.");
-  assert(!li->isStackSlot() && "Trying to spill a stack slot.");
-
-  li_ = li;
-  newIntervals_ = &newIntervals;
-  rc_ = mri_.getRegClass(li->reg);
-  spillIs_ = &spillIs;
+                          const SmallVectorImpl<LiveInterval*> &spillIs) {
+  LiveRangeEdit edit(*li, newIntervals, spillIs);
+  spill(edit);
+  if (VerifySpills)
+    mf_.verify(&pass_, "After inline spill");
+}
 
-  if (split())
-    return;
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+  edit_ = &edit;
+  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+         && "Trying to spill a stack slot.");
+  DEBUG(dbgs() << "Inline spilling "
+               << mri_.getRegClass(edit.getReg())->getName()
+               << ':' << edit.getParent() << "\n");
+  assert(edit.getParent().isSpillable() &&
+         "Attempting to spill already spilled value.");
 
   reMaterializeAll();
 
   // Remat may handle everything.
-  if (li_->empty())
+  if (edit_->getParent().empty())
     return;
 
-  stackSlot_ = vrm_.getStackSlot(li->reg);
-  if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
-    stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+  rc_ = mri_.getRegClass(edit.getReg());
+  stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg());
+
+  // Update LiveStacks now that we are committed to spilling.
+  LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
+  assert(stacklvr.empty() && "Just created stack slot not empty");
+  stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+  stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
 
   // Iterate over instructions using register.
-  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
        MachineInstr *MI = RI.skipInstruction();) {
 
     // Debug values are not allowed to affect codegen.
@@ -440,7 +394,7 @@ void InlineSpiller::spill(LiveInterval *li,
     // Analyze instruction.
     bool Reads, Writes;
     SmallVector<unsigned, 8> Ops;
-    tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+    tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
 
     // Attempt to fold memory ops.
     if (foldMemoryOperand(MI, Ops))
@@ -448,9 +402,7 @@ void InlineSpiller::spill(LiveInterval *li,
 
     // Allocate interval around instruction.
     // FIXME: Infer regclass from instruction alone.
-    unsigned NewVReg = mri_.createVirtualRegister(rc_);
-    vrm_.grow();
-    LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+    LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
     NewLI.markNotSpillable();
 
     if (Reads)
@@ -460,7 +412,7 @@ void InlineSpiller::spill(LiveInterval *li,
     bool hasLiveDef = false;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(Ops[i]);
-      MO.setReg(NewVReg);
+      MO.setReg(NewLI.reg);
       if (MO.isUse()) {
         if (!MI->isRegTiedToDefOperand(Ops[i]))
           MO.setIsKill();
@@ -475,6 +427,5 @@ void InlineSpiller::spill(LiveInterval *li,
       insertSpill(NewLI, MI);
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
-    newIntervals.push_back(&NewLI);
   }
 }
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3852ebaf6425..3861ddadf655 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -85,9 +85,11 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 }
 
 // VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
 void IntrinsicLowering::AddPrototypes(Module &M) {
@@ -536,3 +538,27 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
          "Lowering should have eliminated any uses of the intrinsic call!");
   CI->eraseFromParent();
 }
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+  // Verify this is a simple bswap.
+  if (CI->getNumArgOperands() != 1 ||
+      CI->getType() != CI->getArgOperand(0)->getType() ||
+      !CI->getType()->isIntegerTy())
+    return false;
+
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty)
+    return false;
+
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+  Value *Op = CI->getArgOperand(0);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 36038027b259..80dfc763af69 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -20,9 +20,11 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/StandardPasses.h"
 using namespace llvm;
 
 namespace llvm {
@@ -140,13 +143,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
-    if (ShowMCEncoding)
+    TargetAsmBackend *TAB = 0;
+    if (ShowMCEncoding) {
       MCE = getTarget().createCodeEmitter(*this, *Context);
-
-    AsmStreamer.reset(createAsmStreamer(*Context, Out,
-                                        getTargetData()->isLittleEndian(),
-                                        getVerboseAsm(), InstPrinter,
-                                        MCE, ShowMCInst));
+      TAB = getTarget().createAsmBackend(TargetTriple);
+    }
+
+    MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+                                                  getVerboseAsm(),
+                                                  hasMCUseLoc(),
+                                                  InstPrinter,
+                                                  MCE, TAB,
+                                                  ShowMCInst);
+    AsmStreamer.reset(S);
     break;
   }
   case CGFT_ObjectFile: {
@@ -159,7 +168,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
                                                        *TAB, Out, MCE,
-                                                       hasMCRelaxAll()));
+                                                       hasMCRelaxAll(),
+                                                       hasMCNoExecStack()));
+    AsmStreamer.get()->InitSections();
     break;
   }
   case CGFT_Null:
@@ -241,7 +252,7 @@ static void printAndVerify(PassManagerBase &PM,
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 
   if (VerifyMachineCode)
-    PM.add(createMachineVerifierPass());
+    PM.add(createMachineVerifierPass(Banner));
 }
 
 /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -253,6 +264,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                MCContext *&OutContext) {
   // Standard LLVM-Level Passes.
 
+  // Basic AliasAnalysis support.
+  createStandardAliasAnalysisPasses(&PM);
+
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
   if (!DisableVerify)
@@ -288,7 +302,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
     // FALLTHROUGH
-  case ExceptionHandling::Dwarf:
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::DwarfTable:
     PM.add(createDwarfEHPass(this));
     break;
   case ExceptionHandling::None:
@@ -320,7 +335,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Install a MachineModuleInfo class, which is an immutable pass that holds
   // all the per-module stuff we're generating, including MCContext.
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+  TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
   PM.add(MMI);
   OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
 
@@ -339,6 +355,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Print the instruction selected machine code...
   printAndVerify(PM, "After Instruction Selection");
 
+  // Expand pseudo-instructions emitted by ISel.
+  PM.add(createExpandISelPseudosPass());
+
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
   // instructions dead.
   if (OptLevel != CodeGenOpt::None)
@@ -356,13 +375,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createDeadMachineInstructionElimPass());
     printAndVerify(PM, "After codegen DCE pass");
 
-    PM.add(createPeepholeOptimizerPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
     PM.add(createMachineCSEPass());
     if (!DisableMachineSink)
       PM.add(createMachineSinkingPass());
     printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+    PM.add(createPeepholeOptimizerPass());
+    printAndVerify(PM, "After codegen peephole optimization pass");
   }
 
   // Pre-ra tail duplication.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index b9527fafbee8..0eb009ddac29 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -16,6 +16,7 @@
 #define DEBUG_TYPE "scheduler"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
@@ -35,14 +36,14 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
   unsigned RHSLatency = PQ->getLatency(RHSNum);
   if (LHSLatency < RHSLatency) return true;
   if (LHSLatency > RHSLatency) return false;
-  
+
   // After that, if two nodes have identical latencies, look to see if one will
   // unblock more other nodes than the other.
   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
   if (LHSBlocked < RHSBlocked) return true;
   if (LHSBlocked > RHSBlocked) return false;
-  
+
   // Finally, just to provide a stable ordering, use the node number as a
   // deciding factor.
   return LHSNum < RHSNum;
@@ -64,7 +65,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
       OnlyAvailablePred = &Pred;
     }
   }
-      
+
   return OnlyAvailablePred;
 }
 
@@ -78,7 +79,7 @@ void LatencyPriorityQueue::push(SUnit *SU) {
       ++NumNodesBlocking;
   }
   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
-  
+
   Queue.push_back(SU);
 }
 
@@ -102,10 +103,10 @@ void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
 /// node of the same priority that will not make a node available.
 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
   if (SU->isAvailable) return;  // All preds scheduled.
-  
+
   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
   if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
-  
+
   // Okay, we found a single predecessor that is available, but not scheduled.
   // Since it is available, it must be in the priority queue.  First remove it.
   remove(OnlyAvailablePred);
@@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
     std::swap(*I, Queue.back());
   Queue.pop_back();
 }
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+  LatencyPriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..853ec1ac7c13
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,711 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+          cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequiredTransitive<LiveIntervals>();
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class UserValue {
+  const MDNode *variable; ///< The debug info variable we are part of.
+  unsigned offset;        ///< Byte offset into variable.
+  DebugLoc dl;            ///< The debug location for the variable. This is
+                          ///< used by dwarf writer to find lexical scope.
+  UserValue *leader;      ///< Equivalence class leader.
+  UserValue *next;        ///< Next value in equivalence class, or null.
+
+  /// Numbered locations referenced by locmap.
+  SmallVector<MachineOperand, 4> locations;
+
+  /// Map of slot indices where this value is live.
+  LocMap locInts;
+
+  /// coalesceLocation - After LocNo was changed, check if it has become
+  /// identical to another location, and coalesce them. This may cause LocNo or
+  /// a later location to be erased, but no earlier location will be erased.
+  void coalesceLocation(unsigned LocNo);
+
+  /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+  /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
+  void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                       LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+  /// UserValue - Create a new UserValue.
+  UserValue(const MDNode *var, unsigned o, DebugLoc L, 
+            LocMap::Allocator &alloc)
+    : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+  {}
+
+  /// getLeader - Get the leader of this value's equivalence class.
+  UserValue *getLeader() {
+    UserValue *l = leader;
+    while (l != l->leader)
+      l = l->leader;
+    return leader = l;
+  }
+
+  /// getNext - Return the next UserValue in the equivalence class.
+  UserValue *getNext() const { return next; }
+
+  /// match - Does this UserValue match the aprameters?
+  bool match(const MDNode *Var, unsigned Offset) const {
+    return Var == variable && Offset == offset;
+  }
+
+  /// merge - Merge equivalence classes.
+  static UserValue *merge(UserValue *L1, UserValue *L2) {
+    L2 = L2->getLeader();
+    if (!L1)
+      return L2;
+    L1 = L1->getLeader();
+    if (L1 == L2)
+      return L1;
+    // Splice L2 before L1's members.
+    UserValue *End = L2;
+    while (End->next)
+      End->leader = L1, End = End->next;
+    End->leader = L1;
+    End->next = L1->next;
+    L1->next = L2;
+    return L1;
+  }
+
+  /// getLocationNo - Return the location number that matches Loc.
+  unsigned getLocationNo(const MachineOperand &LocMO) {
+    if (LocMO.isReg() && LocMO.getReg() == 0)
+      return ~0u;
+    for (unsigned i = 0, e = locations.size(); i != e; ++i)
+      if (LocMO.isIdenticalTo(locations[i]))
+        return i;
+    locations.push_back(LocMO);
+    // We are storing a MachineOperand outside a MachineInstr.
+    locations.back().clearParent();
+    return locations.size() - 1;
+  }
+
+  /// addDef - Add a definition point to this value.
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+    // Add a singular (Idx,Idx) -> Loc mapping.
+    LocMap::iterator I = locInts.find(Idx);
+    if (!I.valid() || I.start() != Idx)
+      I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+  }
+
+  /// extendDef - Extend the current definition as far as possible down the
+  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// range of VNI.
+  /// @param Idx   Starting point for the definition.
+  /// @param LocNo Location number to propagate.
+  /// @param LI    Restrict liveness to where LI has the value VNI. May be null.
+  /// @param VNI   When LI is not null, this is the value to restrict to.
+  /// @param LIS   Live intervals analysis.
+  /// @param MDT   Dominator tree.
+  void extendDef(SlotIndex Idx, unsigned LocNo,
+                 LiveInterval *LI, const VNInfo *VNI,
+                 LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// computeIntervals - Compute the live intervals of all locations after
+  /// collecting all their def points.
+  void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+                      const TargetRegisterInfo *TRI);
+
+  /// rewriteLocations - Rewrite virtual register locations according to the
+  /// provided virtual register map.
+  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM,
+                       LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+  /// variable may have more than one corresponding DBG_VALUE instructions. 
+  /// Only first one needs DebugLoc to identify variable's lexical scope
+  /// in source file.
+  DebugLoc findDebugLoc();
+  void print(raw_ostream&, const TargetRegisterInfo*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+  LiveDebugVariables &pass;
+  LocMap::Allocator allocator;
+  MachineFunction *MF;
+  LiveIntervals *LIS;
+  MachineDominatorTree *MDT;
+  const TargetRegisterInfo *TRI;
+
+  /// userValues - All allocated UserValue instances.
+  SmallVector<UserValue*, 8> userValues;
+
+  /// Map virtual register to eq class leader.
+  typedef DenseMap<unsigned, UserValue*> VRMap;
+  VRMap virtRegToEqClass;
+
+  /// Map user variable to eq class leader.
+  typedef DenseMap<const MDNode *, UserValue*> UVMap;
+  UVMap userVarMap;
+
+  /// getUserValue - Find or create a UserValue.
+  UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+  /// lookupVirtReg - Find the EC leader for VirtReg or null.
+  UserValue *lookupVirtReg(unsigned VirtReg);
+
+  /// mapVirtReg - Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+  /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+  /// @param MI  DBG_VALUE instruction
+  /// @param Idx Last valid SLotIndex before instruction.
+  /// @return    True if the DBG_VALUE instruction should be deleted.
+  bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+  /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+  /// a UserValue def for each instruction.
+  /// @param mf MachineFunction to be scanned.
+  /// @return True if any debug values were found.
+  bool collectDebugValues(MachineFunction &mf);
+
+  /// computeIntervals - Compute the live intervals of all user values after
+  /// collecting all their def points.
+  void computeIntervals();
+
+public:
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  /// clear - Relase all memory.
+  void clear() {
+    DeleteContainerPointers(userValues);
+    userValues.clear();
+    virtRegToEqClass.clear();
+    userVarMap.clear();
+  }
+
+  /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+    OS << "!\"" << MDS->getString() << "\"\t";
+  if (offset)
+    OS << '+' << offset;
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+    OS << " [" << I.start() << ';' << I.stop() << "):";
+    if (I.value() == ~0u)
+      OS << "undef";
+    else
+      OS << I.value();
+  }
+  for (unsigned i = 0, e = locations.size(); i != e; ++i)
+    OS << " Loc" << i << '=' << locations[i];
+  OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+  OS << "********** DEBUG VARIABLES **********\n";
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+  unsigned KeepLoc = 0;
+  for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+    if (KeepLoc == LocNo)
+      continue;
+    if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+      break;
+  }
+  // No matches.
+  if (KeepLoc == locations.size())
+    return;
+
+  // Keep the smaller location, erase the larger one.
+  unsigned EraseLoc = LocNo;
+  if (KeepLoc > EraseLoc)
+    std::swap(KeepLoc, EraseLoc);
+  locations.erase(locations.begin() + EraseLoc);
+
+  // Rewrite values.
+  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+    unsigned v = I.value();
+    if (v == EraseLoc)
+      I.setValue(KeepLoc);      // Coalesce when possible.
+    else if (v > EraseLoc)
+      I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+  }
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+                                 DebugLoc DL) {
+  UserValue *&Leader = userVarMap[Var];
+  if (Leader) {
+    UserValue *UV = Leader->getLeader();
+    Leader = UV;
+    for (; UV; UV = UV->getNext())
+      if (UV->match(Var, Offset))
+        return UV;
+  }
+
+  UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+  userValues.push_back(UV);
+  Leader = UserValue::merge(Leader, UV);
+  return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+  UserValue *&Leader = virtRegToEqClass[VirtReg];
+  Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+  if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+    return UV->getLeader();
+  return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+  // DBG_VALUE loc, offset, variable
+  if (MI->getNumOperands() != 3 ||
+      !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+    DEBUG(dbgs() << "Can't handle " << *MI);
+    return false;
+  }
+
+  // Get or create the UserValue for (variable,offset).
+  unsigned Offset = MI->getOperand(1).getImm();
+  const MDNode *Var = MI->getOperand(2).getMetadata();
+  UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+
+  // If the location is a virtual register, make sure it is mapped.
+  if (MI->getOperand(0).isReg()) {
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      mapVirtReg(Reg, UV);
+  }
+
+  UV->addDef(Idx, MI->getOperand(0));
+  return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+  bool Changed = false;
+  for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE;) {
+      if (!MBBI->isDebugValue()) {
+        ++MBBI;
+        continue;
+      }
+      // DBG_VALUE has no slot index, use the previous instruction instead.
+      SlotIndex Idx = MBBI == MBB->begin() ?
+        LIS->getMBBStartIdx(MBB) :
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+      // Handle consecutive DBG_VALUE instructions with the same slot index.
+      do {
+        if (handleDebugValue(MBBI, Idx)) {
+          MBBI = MBB->erase(MBBI);
+          Changed = true;
+        } else
+          ++MBBI;
+      } while (MBBI != MBBE && MBBI->isDebugValue());
+    }
+  }
+  return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+                          LiveInterval *LI, const VNInfo *VNI,
+                          LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<SlotIndex, 16> Todo;
+  Todo.push_back(Idx);
+
+  do {
+    SlotIndex Start = Todo.pop_back_val();
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+    LocMap::iterator I = locInts.find(Start);
+
+    // Limit to VNI's live range.
+    bool ToEnd = true;
+    if (LI && VNI) {
+      LiveRange *Range = LI->getLiveRangeContaining(Start);
+      if (!Range || Range->valno != VNI)
+        continue;
+      if (Range->end < Stop)
+        Stop = Range->end, ToEnd = false;
+    }
+
+    // There could already be a short def at Start.
+    if (I.valid() && I.start() <= Start) {
+      // Stop when meeting a different location or an already extended interval.
+      Start = Start.getNextSlot();
+      if (I.value() != LocNo || I.stop() != Start)
+        continue;
+      // This is a one-slot placeholder. Just skip it.
+      ++I;
+    }
+
+    // Limited by the next def.
+    if (I.valid() && I.start() < Stop)
+      Stop = I.start(), ToEnd = false;
+
+    if (Start >= Stop)
+      continue;
+
+    I.insert(Start, Stop, LocNo);
+
+    // If we extended to the MBB end, propagate down the dominator tree.
+    if (!ToEnd)
+      continue;
+    const std::vector<MachineDomTreeNode*> &Children =
+      MDT.getNode(MBB)->getChildren();
+    for (unsigned i = 0, e = Children.size(); i != e; ++i)
+      Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+  } while (!Todo.empty());
+}
+
+void
+UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+  // Collect all defs to be extended (Skipping undefs).
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+    if (I.value() != ~0u)
+      Defs.push_back(std::make_pair(I.start(), I.value()));
+
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+    SlotIndex Idx = Defs[i].first;
+    unsigned LocNo = Defs[i].second;
+    const MachineOperand &Loc = locations[LocNo];
+
+    // Register locations are constrained to where the register value is live.
+    if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+      LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+      const VNInfo *VNI = LI->getVNInfoAt(Idx);
+      extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+    } else
+      extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+  }
+
+  // Finally, erase all the undefs.
+  for (LocMap::iterator I = locInts.begin(); I.valid();)
+    if (I.value() == ~0u)
+      I.erase();
+    else
+      ++I;
+}
+
+void LDVImpl::computeIntervals() {
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->computeIntervals(*LIS, *MDT);
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  LIS = &pass.getAnalysis<LiveIntervals>();
+  MDT = &pass.getAnalysis<MachineDominatorTree>();
+  TRI = mf.getTarget().getRegisterInfo();
+  clear();
+  DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+               << ((Value*)mf.getFunction())->getName()
+               << " **********\n");
+
+  bool Changed = collectDebugValues(mf);
+  computeIntervals();
+  DEBUG(print(dbgs()));
+  return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+  if (!EnableLDV)
+    return false;
+  if (!pImpl)
+    pImpl = new LDVImpl(this);
+  return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+  if (pImpl)
+    delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+               const TargetRegisterInfo *TRI) {
+  for (unsigned i = locations.size(); i; --i) {
+    unsigned LocNo = i - 1;
+    MachineOperand &Loc = locations[LocNo];
+    if (!Loc.isReg() || Loc.getReg() != OldReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      Loc.substPhysReg(NewReg, *TRI);
+    else
+      Loc.substVirtReg(NewReg, SubIdx, *TRI);
+    coalesceLocation(LocNo);
+  }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  UserValue *UV = lookupVirtReg(OldReg);
+  if (!UV)
+    return;
+
+  if (TargetRegisterInfo::isVirtualRegister(NewReg))
+    mapVirtReg(NewReg, UV);
+  virtRegToEqClass.erase(OldReg);
+
+  do {
+    UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+    UV = UV->getNext();
+  } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+  // Iterate over locations in reverse makes it easier to handle coalescing.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    MachineOperand &Loc = locations[LocNo];
+    // Only virtual registers are rewritten.
+    if (!Loc.isReg() || !Loc.getReg() ||
+        !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+      continue;
+    unsigned VirtReg = Loc.getReg();
+    if (VRM.isAssignedReg(VirtReg) &&
+        TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+      Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+      // FIXME: Translate SubIdx to a stackslot offset.
+      Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+    } else {
+      Loc.setReg(0);
+      Loc.setSubReg(0);
+    }
+    coalesceLocation(LocNo);
+  }
+  DEBUG(print(dbgs(), &TRI));
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+                   LiveIntervals &LIS) {
+  SlotIndex Start = LIS.getMBBStartIdx(MBB);
+  Idx = Idx.getBaseIndex();
+
+  // Try to find an insert location by going backwards from Idx.
+  MachineInstr *MI;
+  while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+    // We've reached the beginning of MBB.
+    if (Idx == Start) {
+      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      return I;
+    }
+    Idx = Idx.getPrevIndex();
+  }
+
+  // Don't insert anything after the first terminator, though.
+  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+                                    llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+  DebugLoc D = dl;
+  dl = DebugLoc();
+  return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+                                 unsigned LocNo,
+                                 LiveIntervals &LIS,
+                                 const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  MachineOperand &Loc = locations[LocNo];
+
+  // Frame index locations may require a target callback.
+  if (Loc.isFI()) {
+    MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+                                          Loc.getIndex(), offset, variable, 
+                                                    findDebugLoc());
+    if (MI) {
+      MBB->insert(I, MI);
+      return;
+    }
+  }
+  // This is not a frame index, or the target is happy with a standard FI.
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                               LiveIntervals &LIS, const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
+    .addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+                                const TargetInstrInfo &TII) {
+  MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+  for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+    SlotIndex Start = I.start();
+    SlotIndex Stop = I.stop();
+    unsigned LocNo = I.value();
+    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+    DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+    insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+    // This interval may span multiple basic blocks.
+    // Insert a DBG_VALUE into each one.
+    while(Stop > MBBEnd) {
+      // Move to the next block.
+      Start = MBBEnd;
+      if (++MBB == MFEnd)
+        break;
+      MBBEnd = LIS.getMBBEndIdx(MBB);
+      DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+      insertDebugValue(MBB, Start, LocNo, LIS, TII);
+    }
+    DEBUG(dbgs() << '\n');
+    if (MBB == MFEnd)
+      break;
+
+    ++I;
+    if (Stop == MBBEnd)
+      continue;
+    // The current interval ends before MBB.
+    // Insert a kill if there is a gap.
+    if (!I.valid() || I.start() > Stop)
+      insertDebugKill(MBB, Stop, LIS, TII);
+  }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+  DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    userValues[i]->rewriteLocations(*VRM, *TRI);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+  }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..a6e40a198456
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,63 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+  void *pImpl;
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  LiveDebugVariables();
+  ~LiveDebugVariables();
+
+  /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+  /// @param OldReg Old virtual register that is going away.
+  /// @param NewReg New register holding the user variables.
+  /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+  ///               register.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+  /// that happened during register allocation.
+  /// @param VRM Rename virtual registers according to map.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  /// dump - Print data structures to dbgs().
+  void dump();
+
+private:
+
+  virtual bool runOnMachineFunction(MachineFunction &);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 59f380ad2641..c2dbd6ab75a1 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -30,58 +30,19 @@
 #include <algorithm>
 using namespace llvm;
 
-// An example for liveAt():
-//
-// this = [1,4), liveAt(0) will return false. The instruction defining this
-// spans slots [0,3]. The interval belongs to an spilled definition of the
-// variable it represents. This is because slot 1 is used (def slot) and spans
-// up to slot 3 (store slot).
-//
-bool LiveInterval::liveAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  return r->contains(I);
-}
-
-// liveBeforeAndAt - Check if the interval is live at the index and the index
-// just before it. If index is liveAt, check if it starts a new live range.
-// If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  if (!r->contains(I))
-    return false;
-  if (I != r->start)
-    return true;
-  // I is the start of a live range. Check if the previous live range ends
-  // at I-1.
-  if (r == ranges.begin())
-    return false;
-  return r->end == I;
+// CompEnd - Compare LiveRange ends.
+namespace {
+struct CompEnd {
+  bool operator()(const LiveRange &A, const LiveRange &B) const {
+    return A.end < B.end;
+  }
+};
 }
 
-/// killedAt - Return true if a live range ends at index. Note that the kill
-/// point is not contained in the half-open live range. It is usually the
-/// getDefIndex() slot following its last use.
-bool LiveInterval::killedAt(SlotIndex I) const {
-  Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
-
-  // Now r points to the first interval with start >= I, or ranges.end().
-  if (r == ranges.begin())
-    return false;
-
-  --r;
-  // Now r points to the last interval with end <= I.
-  // r->end is the kill point.
-  return r->end == I;
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+  assert(Pos.isValid() && "Cannot search for an invalid index");
+  return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0),
+                          CompEnd());
 }
 
 /// killedInRange - Return true if the interval has kills in [Start,End).
@@ -330,25 +291,14 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
   return ranges.insert(it, LR);
 }
 
-/// isInOneLiveRange - Return true if the range specified is entirely in
-/// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
-  Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
-  if (I == ranges.begin())
-    return false;
-  --I;
-  return I->containsRange(Start, End);
-}
-
 
 /// removeRange - Remove the specified range from this interval.  Note that
 /// the range must be in a single LiveRange in its entirety.
 void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
                                bool RemoveDeadValNo) {
   // Find the LiveRange containing this span.
-  Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
-  assert(I != ranges.begin() && "Range is not in interval!");
-  --I;
+  Ranges::iterator I = find(Start);
+  assert(I != ranges.end() && "Range is not in interval!");
   assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
 
   // If the span we are removing is at the start of the LiveRange, adjust it.
@@ -405,32 +355,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
   markValNoForDeletion(ValNo);
 }
 
-/// getLiveRangeContaining - Return the live range that contains the
-/// specified index, or null if there is none.
-LiveInterval::const_iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
-  const_iterator It = std::upper_bound(begin(), end(), Idx);
-  if (It != ranges.begin()) {
-    --It;
-    if (It->contains(Idx))
-      return It;
-  }
-
-  return end();
-}
-
-LiveInterval::iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
-  iterator It = std::upper_bound(begin(), end(), Idx);
-  if (It != begin()) {
-    --It;
-    if (It->contains(Idx))
-      return It;
-  }
-
-  return end();
-}
-
 /// findDefinedVNInfo - Find the VNInfo defined by the specified
 /// index (register interval).
 VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
@@ -443,17 +367,6 @@ VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
   return 0;
 }
 
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
-  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
-       i != e; ++i) {
-    if ((*i)->getReg() == reg)
-      return *i;
-  }
-  return 0;
-}
-
 /// join - Join two live intervals (this, and other) together.  This applies
 /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
 /// the intervals are not joinable, this aborts.
@@ -616,103 +529,6 @@ void LiveInterval::MergeValueInAsValue(
 }
 
 
-/// MergeInClobberRanges - For any live ranges that are not defined in the
-/// current interval, but are defined in the Clobbers interval, mark them
-/// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
-                                        const LiveInterval &Clobbers,
-                                        VNInfo::Allocator &VNInfoAllocator) {
-  if (Clobbers.empty()) return;
-
-  DenseMap<VNInfo*, VNInfo*> ValNoMaps;
-  VNInfo *UnusedValNo = 0;
-  iterator IP = begin();
-  for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
-    // For every val# in the Clobbers interval, create a new "unknown" val#.
-    VNInfo *ClobberValNo = 0;
-    DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
-    if (VI != ValNoMaps.end())
-      ClobberValNo = VI->second;
-    else if (UnusedValNo)
-      ClobberValNo = UnusedValNo;
-    else {
-      UnusedValNo = ClobberValNo =
-        getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-      ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
-    }
-
-    bool Done = false;
-    SlotIndex Start = I->start, End = I->end;
-    // If a clobber range starts before an existing range and ends after
-    // it, the clobber range will need to be split into multiple ranges.
-    // Loop until the entire clobber range is handled.
-    while (!Done) {
-      Done = true;
-      IP = std::upper_bound(IP, end(), Start);
-      SlotIndex SubRangeStart = Start;
-      SlotIndex SubRangeEnd = End;
-
-      // If the start of this range overlaps with an existing liverange, trim it.
-      if (IP != begin() && IP[-1].end > SubRangeStart) {
-        SubRangeStart = IP[-1].end;
-        // Trimmed away the whole range?
-        if (SubRangeStart >= SubRangeEnd) continue;
-      }
-      // If the end of this range overlaps with an existing liverange, trim it.
-      if (IP != end() && SubRangeEnd > IP->start) {
-        // If the clobber live range extends beyond the existing live range,
-        // it'll need at least another live range, so set the flag to keep
-        // iterating.
-        if (SubRangeEnd > IP->end) {
-          Start = IP->end;
-          Done = false;
-        }
-        SubRangeEnd = IP->start;
-        // If this trimmed away the whole range, ignore it.
-        if (SubRangeStart == SubRangeEnd) continue;
-      }
-
-      // Insert the clobber interval.
-      IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
-                        IP);
-      UnusedValNo = 0;
-    }
-  }
-
-  if (UnusedValNo) {
-    // Delete the last unused val#.
-    valnos.pop_back();
-  }
-}
-
-void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
-                                       SlotIndex Start,
-                                       SlotIndex End,
-                                       VNInfo::Allocator &VNInfoAllocator) {
-  // Find a value # to use for the clobber ranges.  If there is already a value#
-  // for unknown values, use it.
-  VNInfo *ClobberValNo =
-    getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
-  iterator IP = begin();
-  IP = std::upper_bound(IP, end(), Start);
-
-  // If the start of this range overlaps with an existing liverange, trim it.
-  if (IP != begin() && IP[-1].end > Start) {
-    Start = IP[-1].end;
-    // Trimmed away the whole range?
-    if (Start >= End) return;
-  }
-  // If the end of this range overlaps with an existing liverange, trim it.
-  if (IP != end() && End > IP->start) {
-    End = IP->start;
-    // If this trimmed away the whole range, ignore it.
-    if (Start == End) return;
-  }
-
-  // Insert the clobber interval.
-  addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
-}
 
 /// MergeValueNumberInto - This method is called when two value nubmers
 /// are found to be equivalent.  This eliminates V1, replacing all
@@ -767,6 +583,9 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
     }
   }
 
+  // Merge the relevant flags.
+  V2->mergeFlags(V1);
+
   // Now that V1 is dead, remove it.
   markValNoForDeletion(V1);
 
@@ -831,14 +650,9 @@ void LiveRange::dump() const {
 }
 
 void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
-  if (isStackSlot())
-    OS << "SS#" << getStackSlotIndex();
-  else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
-    OS << TRI->getName(reg);
-  else
-    OS << "%reg" << reg;
-
-  OS << ',' << weight;
+  OS << PrintReg(reg, TRI);
+  if (weight != 0)
+    OS << ',' << weight;
 
   if (empty())
     OS << " EMPTY";
@@ -863,10 +677,9 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
       if (vni->isUnused()) {
         OS << "x";
       } else {
-        if (!vni->isDefAccurate() && !vni->isPHIDef())
-          OS << "?";
-        else
-          OS << vni->def;
+        OS << vni->def;
+        if (vni->isPHIDef())
+          OS << "-phidef";
         if (vni->hasPHIKill())
           OS << "-phikill";
         if (vni->hasRedefByEC())
@@ -884,3 +697,84 @@ void LiveInterval::dump() const {
 void LiveRange::print(raw_ostream &os) const {
   os << *this;
 }
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+  // Create initial equivalence classes.
+  eqClass_.clear();
+  eqClass_.grow(LI->getNumValNums());
+
+  const VNInfo *used = 0, *unused = 0;
+
+  // Determine connections.
+  for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+       I != E; ++I) {
+    const VNInfo *VNI = *I;
+    // Group all unused values into one class.
+    if (VNI->isUnused()) {
+      if (unused)
+        eqClass_.join(unused->id, VNI->id);
+      unused = VNI;
+      continue;
+    }
+    used = VNI;
+    if (VNI->isPHIDef()) {
+      const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+      assert(MBB && "Phi-def has no defining MBB");
+      // Connect to values live out of predecessors.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI)
+        if (const VNInfo *PVNI =
+              LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
+          eqClass_.join(VNI->id, PVNI->id);
+    } else {
+      // Normal value defined by an instruction. Check for two-addr redef.
+      // FIXME: This could be coincidental. Should we really check for a tied
+      // operand constraint?
+      // Note that VNI->def may be a use slot for an early clobber def.
+      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+        eqClass_.join(VNI->id, UVNI->id);
+    }
+  }
+
+  // Lump all the unused values in with the last used value.
+  if (used && unused)
+    eqClass_.join(used->id, unused->id);
+
+  eqClass_.compress();
+  return eqClass_.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+  assert(LIV[0] && "LIV[0] must be set");
+  LiveInterval &LI = *LIV[0];
+
+  // First move runs to new intervals.
+  LiveInterval::iterator J = LI.begin(), E = LI.end();
+  while (J != E && eqClass_[J->valno->id] == 0)
+    ++J;
+  for (LiveInterval::iterator I = J; I != E; ++I) {
+    if (unsigned eq = eqClass_[I->valno->id]) {
+      assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+             "New intervals should be empty");
+      LIV[eq]->ranges.push_back(*I);
+    } else
+      *J++ = *I;
+  }
+  LI.ranges.erase(J, E);
+
+  // Transfer VNInfos to their new owners and renumber them.
+  unsigned j = 0, e = LI.getNumValNums();
+  while (j != e && eqClass_[j] == 0)
+    ++j;
+  for (unsigned i = j; i != e; ++i) {
+    VNInfo *VNI = LI.getValNumInfo(i);
+    if (unsigned eq = eqClass_[i]) {
+      VNI->id = LIV[eq]->getNumValNums();
+      LIV[eq]->valnos.push_back(VNI);
+    } else {
+      VNI->id = j;
+      LI.valnos[j++] = VNI;
+    }
+  }
+  LI.valnos.resize(j);
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2726fc337539..aef5b5f77e78 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
 #include "VirtRegMap.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -55,8 +56,17 @@ STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
 STATISTIC(numSplits    , "Number of intervals split");
 
 char LiveIntervals::ID = 0;
-INITIALIZE_PASS(LiveIntervals, "liveintervals",
-                "Live Interval Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
@@ -132,19 +142,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
 
 void LiveIntervals::printInstrs(raw_ostream &OS) const {
   OS << "********** MACHINEINSTRS **********\n";
-
-  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
-       mbbi != mbbe; ++mbbi) {
-    OS << "BB#" << mbbi->getNumber()
-       << ":\t\t# derived from " << mbbi->getName() << "\n";
-    for (MachineBasicBlock::iterator mii = mbbi->begin(),
-           mie = mbbi->end(); mii != mie; ++mii) {
-      if (mii->isDebugValue())
-        OS << "    \t" << *mii;
-      else
-        OS << getInstructionIndex(mii) << '\t' << *mii;
-    }
-  }
+  mf_->print(OS, indexes_);
 }
 
 void LiveIntervals::dumpInstrs() const {
@@ -248,15 +246,6 @@ bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
   return false;
 }
 
-#ifndef NDEBUG
-static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
-  if (TargetRegisterInfo::isPhysicalRegister(reg))
-    dbgs() << tri_->getName(reg);
-  else
-    dbgs() << "%reg" << reg;
-}
-#endif
-
 static
 bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
   unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -285,8 +274,8 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
   SlotIndex RedefIndex = MIIdx.getDefIndex();
   const LiveRange *OldLR =
     interval.getLiveRangeContaining(RedefIndex.getUseIndex());
-  if (OldLR->valno->isDefAccurate()) {
-    MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  if (DefMI != 0) {
     return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
   }
   return false;
@@ -298,10 +287,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              MachineOperand& MO,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
-  DEBUG({
-      dbgs() << "\t\tregister: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   // Virtual registers may be defined multiple times (due to phi
   // elimination and 2-addr elimination).  Much of what we do only has to be
@@ -326,8 +312,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       CopyMI = mi;
     }
 
-    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
-                                          VNInfoAllocator);
+    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
     // Loop over all of the blocks that the vreg is defined in.  There are
@@ -393,8 +378,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // Create interval with one of a NEW value number.  Note that this value
       // number isn't actually defined by an instruction, weird huh? :)
       if (PHIJoin) {
-        ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
-                                      VNInfoAllocator);
+        assert(getInstructionFromIndex(Start) == 0 &&
+               "PHI def index points at actual instruction.");
+        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
         ValNo->setIsPHIDef(true);
       }
       LiveRange LR(Start, killIdx, ValNo);
@@ -440,10 +426,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
 
       // The new value number (#1) is defined by the instruction we claimed
       // defined value #0.
-      VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
-                                            false, // update at *
-                                            VNInfoAllocator);
-      ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
 
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
@@ -481,7 +464,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       MachineInstr *CopyMI = NULL;
       if (mi->isCopyLike())
         CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
 
       SlotIndex killIndex = getMBBEndIdx(mbb);
       LiveRange LR(defIndex, killIndex, ValNo);
@@ -504,10 +487,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineInstr *CopyMI) {
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
-  DEBUG({
-      dbgs() << "\t\tregister: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   SlotIndex baseIndex = MIIdx;
   SlotIndex start = baseIndex.getDefIndex();
@@ -573,11 +553,11 @@ exit:
   assert(start < end && "did not find end of interval?");
 
   // Already exists? Extend old live interval.
-  LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
-  bool Extend = OldLR != interval.end();
-  VNInfo *ValNo = Extend
-    ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
-  if (MO.isEarlyClobber() && Extend)
+  VNInfo *ValNo = interval.getVNInfoAt(start);
+  bool Extend = ValNo != 0;
+  if (!Extend)
+    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+  if (Extend && MO.isEarlyClobber())
     ValNo->setHasRedefByEC(true);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
@@ -611,10 +591,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
-  DEBUG({
-      dbgs() << "\t\tlivein register: ";
-      printRegName(interval.reg, tri_);
-    });
+  DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
   // be considered a livein.
@@ -672,9 +649,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
     }
   }
 
+  SlotIndex defIdx = getMBBStartIdx(MBB);
+  assert(getInstructionFromIndex(defIdx) == 0 &&
+         "PHI def index points at actual instruction.");
   VNInfo *vni =
-    interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
-                          0, false, VNInfoAllocator);
+    interval.getNextValue(defIdx, 0, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
 
@@ -764,10 +743,177 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
   return NewLI;
 }
 
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li) {
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+  assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+         && "Can't only shrink physical registers");
+  // Find all the values used, including PHI kills.
+  SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+  // Visit all instructions reading li->reg.
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+      continue;
+    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+    VNInfo *VNI = li->getVNInfoAt(Idx);
+    assert(VNI && "Live interval not live into reading instruction");
+    if (VNI->def == Idx) {
+      // Special case: An early-clobber tied operand reads and writes the
+      // register one slot early.
+      Idx = Idx.getPrevSlot();
+      VNI = li->getVNInfoAt(Idx);
+      assert(VNI && "Early-clobber tied value not available");
+    }
+    WorkList.push_back(std::make_pair(Idx, VNI));
+  }
+
+  // Create a new live interval with only minimal live segments per def.
+  LiveInterval NewLI(li->reg, 0);
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+  }
+
+  // Extend intervals to reach all uses in WorkList.
+  while (!WorkList.empty()) {
+    SlotIndex Idx = WorkList.back().first;
+    VNInfo *VNI = WorkList.back().second;
+    WorkList.pop_back();
+
+    // Extend the live range for VNI to be live at Idx.
+    LiveInterval::iterator I = NewLI.find(Idx);
+
+    // Already got it?
+    if (I != NewLI.end() && I->start <= Idx) {
+      assert(I->valno == VNI && "Unexpected existing value number");
+      continue;
+    }
+
+    // Is there already a live range in the block containing Idx?
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    SlotIndex BlockStart = getMBBStartIdx(MBB);
+    DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
+                 << " in BB#" << MBB->getNumber() << '@' << BlockStart);
+    if (I != NewLI.begin() && (--I)->end > BlockStart) {
+      assert(I->valno == VNI && "Wrong reaching def");
+      DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
+      // Is this the first use of a PHIDef in its defining block?
+      if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
+        // The PHI is live, make sure the predecessors are live-out.
+        for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+          SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+          VNInfo *PVNI = li->getVNInfoAt(Stop);
+          // A predecessor is not required to have a live-out value for a PHI.
+          if (PVNI) {
+            assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+            WorkList.push_back(std::make_pair(Stop, PVNI));
+          }
+        }
+      }
+
+      // Extend the live range in the block to include Idx.
+      NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
+      continue;
+    }
+
+    // VNI is live-in to MBB.
+    DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+    // Make sure VNI is live-out from the predecessors.
+    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      WorkList.push_back(std::make_pair(Stop, VNI));
+    }
+  }
+
+  // Handle dead values.
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+    assert(LII != NewLI.end() && "Missing live range for PHI");
+    if (LII->end != VNI->def.getNextSlot())
+      continue;
+    if (!VNI->isPHIDef()) {
+      // This is a dead PHI. Remove it.
+      VNI->setIsUnused(true);
+      NewLI.removeRange(*LII);
+    } else {
+      // This is a dead def. Make sure the instruction knows.
+      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      assert(MI && "No instruction defining live value");
+      MI->addRegisterDead(li->reg, tri_);
+    }
+  }
+
+  // Move the trimmed ranges back.
+  li->ranges.swap(NewLI.ranges);
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
 //===----------------------------------------------------------------------===//
 // Register allocator hooks.
 //
 
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+                                 MachineBasicBlock *mbb) const {
+  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+  // If li is not live into a landing pad, we can insert spill code before the
+  // first terminator.
+  if (!lpad || !isLiveInToMBB(li, lpad))
+    return mbb->getFirstTerminator();
+
+  // When there is a landing pad, spill code must go before the call instruction
+  // that can throw.
+  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+  while (I != B) {
+    --I;
+    if (I->getDesc().isCall())
+      return I;
+  }
+  // The block contains no calls that can throw, so use the first terminator.
+  return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    unsigned Reg = I->first;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (mri_->reg_nodbg_empty(Reg))
+      continue;
+    LiveInterval *LI = I->second;
+
+    // Every instruction that kills Reg corresponds to a live range end point.
+    for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+         ++RI) {
+      // A LOAD index indicates an MBB edge.
+      if (RI->end.isLoad())
+        continue;
+      MachineInstr *MI = getInstructionFromIndex(RI->end);
+      if (!MI)
+        continue;
+      MI->addRegisterKilled(Reg, NULL);
+    }
+  }
+}
+
 /// getReMatImplicitUse - If the remat definition MI has one (for now, we only
 /// allow one) virtual register operand, then its uses are implicitly using
 /// the register. Returns the virtual register.
@@ -800,18 +946,17 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
 /// which reaches the given instruction also reaches the specified use index.
 bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
                                        SlotIndex UseIdx) const {
-  SlotIndex Index = getInstructionIndex(MI);
-  VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
-  LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
-  return UI != li.end() && UI->valno == ValNo;
+  VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+  return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
 }
 
 /// isReMaterializable - Returns true if the definition MI of the specified
 /// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       const VNInfo *ValNo, MachineInstr *MI,
-                                       SmallVectorImpl<LiveInterval*> &SpillIs,
-                                       bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const VNInfo *ValNo, MachineInstr *MI,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
   if (DisableReMat)
     return false;
 
@@ -829,7 +974,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
          ri != re; ++ri) {
       MachineInstr *UseMI = &*ri;
       SlotIndex UseIdx = getInstructionIndex(UseMI);
-      if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+      if (li.getVNInfoAt(UseIdx) != ValNo)
         continue;
       if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
         return false;
@@ -855,9 +1000,10 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
 
 /// isReMaterializable - Returns true if every definition of MI of every
 /// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       SmallVectorImpl<LiveInterval*> &SpillIs,
-                                       bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
   isLoad = false;
   for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
        i != e; ++i) {
@@ -865,9 +1011,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
     if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    if (!VNI->isDefAccurate())
-      return false;
     MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    if (!ReMatDefMI)
+      return false;
     bool DefIsLoad = false;
     if (!ReMatDefMI ||
         !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1010,7 +1156,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (!vrm.isReMaterialized(Reg))
       continue;
@@ -1044,7 +1190,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (!mop.isReg())
       continue;
     unsigned Reg = mop.getReg();
-    if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (Reg != li.reg)
       continue;
@@ -1140,11 +1286,14 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       rewriteImplicitOps(li, MI, NewVReg, vrm);
 
     // Reuse NewVReg for other reads.
+    bool HasEarlyClobber = false;
     for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
       MachineOperand &mopj = MI->getOperand(Ops[j]);
       mopj.setReg(NewVReg);
       if (mopj.isImplicit())
         rewriteImplicitOps(li, MI, NewVReg, vrm);
+      if (mopj.isEarlyClobber())
+        HasEarlyClobber = true;
     }
 
     if (CreatedNewVReg) {
@@ -1190,7 +1339,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
     if (HasUse) {
       if (CreatedNewVReg) {
         LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
-                     nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
         DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       } else {
@@ -1203,8 +1352,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       }
     }
     if (HasDef) {
-      LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
-                   nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+      // An early clobber starts at the use slot, except for an early clobber
+      // tied to a use operand (yes, that is a thing).
+      LiveRange LR(HasEarlyClobber && !HasUse ?
+                   index.getUseIndex() : index.getDefIndex(),
+                   index.getStoreIndex(),
+                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
       DEBUG(dbgs() << " +" << LR);
       nI.addRange(LR);
     }
@@ -1554,15 +1707,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
   return (isDef + isUse) * lc;
 }
 
-void
-LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
   for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
-    normalizeSpillWeight(*NewLIs[i]);
+    NewLIs[i]->weight =
+      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
 }
 
 std::vector<LiveInterval*> LiveIntervals::
 addIntervalsForSpills(const LiveInterval &li,
-                      SmallVectorImpl<LiveInterval*> &SpillIs,
+                      const SmallVectorImpl<LiveInterval*> &SpillIs,
                       const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
   assert(li.isSpillable() && "attempt to spill already spilled interval!");
 
@@ -1653,8 +1806,7 @@ addIntervalsForSpills(const LiveInterval &li,
     if (VNI->isUnused())
       continue; // Dead val#.
     // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = VNI->isDefAccurate()
-      ? getInstructionFromIndex(VNI->def) : 0;
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
     bool dummy;
     if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
       // Remember how to remat the def of this val#.
@@ -1926,6 +2078,9 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
                                             unsigned PhysReg, VirtRegMap &vrm) {
   unsigned SpillReg = getRepresentativeReg(PhysReg);
 
+  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+               << " represented by " << tri_->getName(SpillReg) << '\n');
+
   for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
     // If there are registers which alias PhysReg, but which are not a
     // sub-register of the chosen representative super register. Assert
@@ -1937,15 +2092,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
   SmallVector<unsigned, 4> PRegs;
   if (hasInterval(SpillReg))
     PRegs.push_back(SpillReg);
-  else {
-    SmallSet<unsigned, 4> Added;
-    for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
-      if (Added.insert(*AS) && hasInterval(*AS)) {
-        PRegs.push_back(*AS);
-        for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
-          Added.insert(*ASS);
-      }
-  }
+  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+    if (hasInterval(*SR))
+      PRegs.push_back(*SR);
+
+  DEBUG({
+    dbgs() << "Trying to spill:";
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+      dbgs() << ' ' << tri_->getName(PRegs[i]);
+    dbgs() << '\n';
+  });
 
   SmallPtrSet<MachineInstr*, 8> SeenMIs;
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
@@ -1956,18 +2112,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
       continue;
     SeenMIs.insert(MI);
     SlotIndex Index = getInstructionIndex(MI);
+    bool LiveReg = false;
     for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
       unsigned PReg = PRegs[i];
       LiveInterval &pli = getInterval(PReg);
       if (!pli.liveAt(Index))
         continue;
-      vrm.addEmergencySpill(PReg, MI);
+      LiveReg = true;
       SlotIndex StartIdx = Index.getLoadIndex();
       SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
-      if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
-        pli.removeRange(StartIdx, EndIdx);
-        Cut = true;
-      } else {
+      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
         std::string msg;
         raw_string_ostream Msg(msg);
         Msg << "Ran out of registers during register allocation!";
@@ -1978,15 +2132,14 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
         }
         report_fatal_error(Msg.str());
       }
-      for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
-        if (!hasInterval(*AS))
-          continue;
-        LiveInterval &spli = getInterval(*AS);
-        if (spli.liveAt(Index))
-          spli.removeRange(Index.getLoadIndex(),
-                           Index.getNextIndex().getBaseIndex());
-      }
+      pli.removeRange(StartIdx, EndIdx);
+      LiveReg = true;
     }
+    if (!LiveReg)
+      continue;
+    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+    vrm.addEmergencySpill(SpillReg, MI);
+    Cut = true;
   }
   return Cut;
 }
@@ -1996,7 +2149,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
   LiveInterval& Interval = getOrCreateInterval(reg);
   VNInfo* VN = Interval.getNextValue(
     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-    startInst, true, getVNInfoAllocator());
+    startInst, getVNInfoAllocator());
   VN->setHasPHIKill(true);
   LiveRange LR(
      SlotIndex(getInstructionIndex(startInst).getDefIndex()),
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..205f28a0d65a
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,315 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Insert each of the virtual register's live segments into the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+    if (++RegPos == RegEnd)
+      return;
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Remove each of the virtual register's live segments from the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+    SegPos.erase();
+    if (!SegPos.valid())
+      return;
+
+    // Skip all segments that may have been coalesced.
+    RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+    if (RegPos == RegEnd)
+      return;
+
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << "LIU " << PrintReg(RepReg, TRI);
+  if (empty()) {
+    OS << " empty\n";
+    return;
+  }
+  for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+    OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+       << PrintReg(SI.value()->reg, TRI);
+  }
+  OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+                                          const TargetRegisterInfo *TRI) const {
+  OS << '[' << start() << ';' << stop() << "):"
+     << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+                                     const TargetRegisterInfo *TRI) {
+  OS << "Interferences with ";
+  LiveUnion->print(OS, TRI);
+  InterferenceResult IR = firstInterference();
+  while (isInterference(IR)) {
+    OS << "  ";
+    IR.print(OS, TRI);
+    OS << '\n';
+    nextInterference(IR);
+  }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+  for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+    VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments.  This may
+// be ok since most virtual registers have very few segments.  If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+  // Search until reaching the end of the LiveUnion segments.
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  if (IR.VirtRegI == VirtRegEnd)
+    return;
+  while (IR.LiveUnionI.valid()) {
+    // Slowly advance the live virtual reg iterator until we surpass the next
+    // segment in LiveUnion.
+    //
+    // Note: If this is ever used for coalescing of fixed registers and we have
+    // a live vreg with thousands of segments, then change this code to use
+    // upperBound instead.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break; // Retain current (nonoverlapping) LiveUnionI
+
+    // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+    // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+    if (!IR.LiveUnionI.valid())
+      break;
+    if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+      assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+             "upperBound postcondition");
+      break;
+    }
+  }
+  if (!IR.LiveUnionI.valid())
+    IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+  if (CheckedFirstInterference)
+    return FirstInterference;
+  CheckedFirstInterference = true;
+  InterferenceResult &IR = FirstInterference;
+
+  // Quickly skip interference check for empty sets.
+  if (VirtReg->empty() || LiveUnion->empty()) {
+    IR.VirtRegI = VirtReg->end();
+  } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+    // VirtReg starts first, perform double binary search.
+    IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+    if (IR.VirtRegI != VirtReg->end())
+      IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+  } else {
+    // LiveUnion starts first, perform double binary search.
+    IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+    if (IR.LiveUnionI.valid())
+      IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+    else
+      IR.VirtRegI = VirtReg->end();
+  }
+  findIntersection(FirstInterference);
+  assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+         && "Uninitialized iterator");
+  return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+  assert(isInterference(IR) && "iteration past end of interferences");
+
+  // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+  // unique overlapping pairs.
+  if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+    if (++IR.VirtRegI == VirtReg->end())
+      return false;
+  }
+  else {
+    if (!(++IR.LiveUnionI).valid()) {
+      IR.VirtRegI = VirtReg->end();
+      return false;
+    }
+  }
+  // Short-circuit findIntersection() if possible.
+  if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+    return true;
+
+  // Find the next intersection.
+  findIntersection(IR);
+  return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+  SmallVectorImpl<LiveInterval*>::const_iterator I =
+    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+  return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+  InterferenceResult IR = firstInterference();
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  LiveInterval *RecentInterferingVReg = NULL;
+  if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+    // Advance the union's iterator to reach an unseen interfering vreg.
+    do {
+      if (IR.LiveUnionI.value() == RecentInterferingVReg)
+        continue;
+
+      if (!isSeenInterference(IR.LiveUnionI.value()))
+        break;
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+
+    } while ((++IR.LiveUnionI).valid());
+    if (!IR.LiveUnionI.valid())
+      break;
+
+    // Advance the VirtReg iterator until surpassing the next segment in
+    // LiveUnion.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break;
+
+    // Check for intersection with the union's segment.
+    if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+      if (!IR.LiveUnionI.value()->isSpillable())
+        SeenUnspillableVReg = true;
+
+      if (InterferingVRegs.size() == MaxInterferingRegs)
+        // Leave SeenAllInterferences set to false to indicate that at least one
+        // interference exists beyond those we collected.
+        return MaxInterferingRegs;
+
+      InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+      ++IR.LiveUnionI;
+      continue;
+    }
+    // VirtRegI may have advanced far beyond LiveUnionI,
+    // do a fast intersection test to "catch up"
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+  }
+  SeenAllInterferences = true;
+  return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+  // overlaps.
+  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+    Overlaps(LiveUnion->getMap(), Loop->getMap());
+  if (!Overlaps.valid())
+    return false;
+
+  // The loop is overlapping an LIU assignment. Check VirtReg as well.
+  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+  for (;;) {
+    if (VRI == VirtReg->end())
+      return false;
+    if (VRI->start < Overlaps.stop())
+      return true;
+
+    Overlaps.advanceTo(VRI->start);
+    if (!Overlaps.valid())
+      return false;
+    if (Overlaps.start() < VRI->end)
+      return true;
+
+    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+  }
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..6f9c5f4455e9
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,258 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context).  We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+  // A set of live virtual register segments that supports fast insertion,
+  // intersection, and removal.
+  // Mapping SlotIndex intervals to virtual register numbers.
+  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+  // SegmentIter can advance to the next segment ordered by starting position
+  // which may belong to a different live virtual register. We also must be able
+  // to reach the current segment's containing virtual register.
+  typedef LiveSegments::iterator SegmentIter;
+
+  // LiveIntervalUnions share an external allocator.
+  typedef LiveSegments::Allocator Allocator;
+
+  class InterferenceResult;
+  class Query;
+
+private:
+  const unsigned RepReg;  // representative register number
+  unsigned Tag;           // unique tag for current contents.
+  LiveSegments Segments;  // union of virtual reg segments
+
+public:
+  LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+    {}
+
+  // Iterate over all segments in the union of live virtual registers ordered
+  // by their starting position.
+  SegmentIter begin() { return Segments.begin(); }
+  SegmentIter end() { return Segments.end(); }
+  SegmentIter find(SlotIndex x) { return Segments.find(x); }
+  bool empty() const { return Segments.empty(); }
+  SlotIndex startIndex() const { return Segments.start(); }
+
+  // Provide public access to the underlying map to allow overlap iteration.
+  typedef LiveSegments Map;
+  const Map &getMap() { return Segments; }
+
+  /// getTag - Return an opaque tag representing the current state of the union.
+  unsigned getTag() const { return Tag; }
+
+  /// changedSince - Return true if the union change since getTag returned tag.
+  bool changedSince(unsigned tag) const { return tag != Tag; }
+
+  // Add a live virtual register to this union and merge its segments.
+  void unify(LiveInterval &VirtReg);
+
+  // Remove a live virtual register's segments from this union.
+  void extract(LiveInterval &VirtReg);
+
+  // Print union, using TRI to translate register names
+  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+  // Verify the live intervals in this union and add them to the visited set.
+  void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+  /// Cache a single interference test result in the form of two intersecting
+  /// segments. This allows efficiently iterating over the interferences. The
+  /// iteration logic is handled by LiveIntervalUnion::Query which may
+  /// filter interferences depending on the type of query.
+  class InterferenceResult {
+    friend class Query;
+
+    LiveInterval::iterator VirtRegI; // current position in VirtReg
+    SegmentIter LiveUnionI;          // current position in LiveUnion
+
+    // Internal ctor.
+    InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+      : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+  public:
+    // Public default ctor.
+    InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+    /// start - Return the start of the current overlap.
+    SlotIndex start() const {
+      return std::max(VirtRegI->start, LiveUnionI.start());
+    }
+
+    /// stop - Return the end of the current overlap.
+    SlotIndex stop() const {
+      return std::min(VirtRegI->end, LiveUnionI.stop());
+    }
+
+    /// interference - Return the register that is interfering here.
+    LiveInterval *interference() const { return LiveUnionI.value(); }
+
+    // Note: this interface provides raw access to the iterators because the
+    // result has no way to tell if it's valid to dereference them.
+
+    // Access the VirtReg segment.
+    LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+    // Access the LiveUnion segment.
+    const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+    bool operator==(const InterferenceResult &IR) const {
+      return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+    }
+    bool operator!=(const InterferenceResult &IR) const {
+      return !operator==(IR);
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+  };
+
+  /// Query interferences between a single live virtual register and a live
+  /// interval union.
+  class Query {
+    LiveIntervalUnion *LiveUnion;
+    LiveInterval *VirtReg;
+    InterferenceResult FirstInterference;
+    SmallVector<LiveInterval*,4> InterferingVRegs;
+    bool CheckedFirstInterference;
+    bool SeenAllInterferences;
+    bool SeenUnspillableVReg;
+    unsigned Tag;
+
+  public:
+    Query(): LiveUnion(), VirtReg() {}
+
+    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+      SeenAllInterferences(false), SeenUnspillableVReg(false)
+    {}
+
+    void clear() {
+      LiveUnion = NULL;
+      VirtReg = NULL;
+      InterferingVRegs.clear();
+      CheckedFirstInterference = false;
+      SeenAllInterferences = false;
+      SeenUnspillableVReg = false;
+      Tag = 0;
+    }
+
+    void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+      assert(VReg && LIU && "Invalid arguments");
+      if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+        // Retain cached results, e.g. firstInterference.
+        return;
+      }
+      clear();
+      LiveUnion = LIU;
+      VirtReg = VReg;
+      Tag = LIU->getTag();
+    }
+
+    LiveInterval &virtReg() const {
+      assert(VirtReg && "uninitialized");
+      return *VirtReg;
+    }
+
+    bool isInterference(const InterferenceResult &IR) const {
+      if (IR.VirtRegI != VirtReg->end()) {
+        assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+               "invalid segment iterators");
+        return true;
+      }
+      return false;
+    }
+
+    // Does this live virtual register interfere with the union?
+    bool checkInterference() { return isInterference(firstInterference()); }
+
+    // Get the first pair of interfering segments, or a noninterfering result.
+    // This initializes the firstInterference_ cache.
+    const InterferenceResult &firstInterference();
+
+    // Treat the result as an iterator and advance to the next interfering pair
+    // of segments. Visiting each unique interfering pairs means that the same
+    // VirtReg or LiveUnion segment may be visited multiple times.
+    bool nextInterference(InterferenceResult &IR) const;
+
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VReg) const;
+
+    // Did collectInterferingVRegs collect all interferences?
+    bool seenAllInterferences() const { return SeenAllInterferences; }
+
+    // Did collectInterferingVRegs encounter an unspillable vreg?
+    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+    // Vector generated by collectInterferingVRegs.
+    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+      return InterferingVRegs;
+    }
+
+    /// checkLoopInterference - Return true if there is interference overlapping
+    /// Loop.
+    bool checkLoopInterference(MachineLoopRange*);
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+  private:
+    Query(const Query&);          // DO NOT IMPLEMENT
+    void operator=(const Query&); // DO NOT IMPLEMENT
+
+    // Private interface for queries
+    void findIntersection(InterferenceResult &IR) const;
+  };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..3bbda1c2e609
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,129 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
+                                    LiveIntervals &lis,
+                                    VirtRegMap &vrm) {
+  const TargetRegisterClass *RC = mri.getRegClass(getReg());
+  unsigned VReg = mri.createVirtualRegister(RC);
+  vrm.grow();
+  vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
+  LiveInterval &li = lis.getOrCreateInterval(VReg);
+  newRegs_.push_back(&li);
+  return li;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+                                   const TargetInstrInfo &tii,
+                                   AliasAnalysis *aa) {
+  for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+       E = parent_.vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    if (!DefMI)
+      continue;
+    if (tii.isTriviallyReMaterializable(DefMI, aa))
+      remattable_.insert(VNI);
+  }
+  scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+                                        const TargetInstrInfo &tii,
+                                        AliasAnalysis *aa) {
+  if (!scannedRemattable_)
+    scanRemattable(lis, tii, aa);
+  return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+                                       SlotIndex OrigIdx,
+                                       SlotIndex UseIdx,
+                                       LiveIntervals &lis) {
+  OrigIdx = OrigIdx.getUseIndex();
+  UseIdx = UseIdx.getUseIndex();
+  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = OrigMI->getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+      continue;
+    // Reserved registers are OK.
+    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+      continue;
+    // We don't want to move any defs.
+    if (MO.isDef())
+      return false;
+    // We cannot depend on virtual registers in uselessRegs_.
+    for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui)
+      if (uselessRegs_[ui]->reg == MO.getReg())
+        return false;
+
+    LiveInterval &li = lis.getInterval(MO.getReg());
+    const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+    if (!OVNI)
+      continue;
+    if (OVNI != li.getVNInfoAt(UseIdx))
+      return false;
+  }
+  return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+                                       SlotIndex UseIdx,
+                                       bool cheapAsAMove,
+                                       LiveIntervals &lis) {
+  assert(scannedRemattable_ && "Call anyRematerializable first");
+
+  // Use scanRemattable info.
+  if (!remattable_.count(RM.ParentVNI))
+    return false;
+
+  // No defining instruction.
+  RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
+  assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+
+  // If only cheap remats were requested, bail out early.
+  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+    return false;
+
+  // Verify that all used registers are available with the same values.
+  if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+    return false;
+
+  return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         unsigned DestReg,
+                                         const Remat &RM,
+                                         LiveIntervals &lis,
+                                         const TargetInstrInfo &tii,
+                                         const TargetRegisterInfo &tri) {
+  assert(RM.OrigMI && "Invalid remat");
+  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  rematted_.insert(RM.ParentVNI);
+  return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+}
+
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..73f69ed63983
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,135 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+  LiveInterval &parent_;
+  SmallVectorImpl<LiveInterval*> &newRegs_;
+  const SmallVectorImpl<LiveInterval*> &uselessRegs_;
+
+  /// firstNew_ - Index of the first register added to newRegs_.
+  const unsigned firstNew_;
+
+  /// scannedRemattable_ - true when remattable values have been identified.
+  bool scannedRemattable_;
+
+  /// remattable_ - Values defined by remattable instructions as identified by
+  /// tii.isTriviallyReMaterializable().
+  SmallPtrSet<VNInfo*,4> remattable_;
+
+  /// rematted_ - Values that were actually rematted, and so need to have their
+  /// live range trimmed or entirely removed.
+  SmallPtrSet<VNInfo*,4> rematted_;
+
+  /// scanRemattable - Identify the parent_ values that may rematerialize.
+  void scanRemattable(LiveIntervals &lis,
+                      const TargetInstrInfo &tii,
+                      AliasAnalysis *aa);
+
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx, LiveIntervals &lis);
+
+public:
+  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+  /// @param parent The register being spilled or split.
+  /// @param newRegs List to receive any new registers created. This needn't be
+  ///                empty initially, any existing registers are ignored.
+  /// @param uselessRegs List of registers that can't be used when
+  ///        rematerializing values because they are about to be removed.
+  LiveRangeEdit(LiveInterval &parent,
+                SmallVectorImpl<LiveInterval*> &newRegs,
+                const SmallVectorImpl<LiveInterval*> &uselessRegs)
+    : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
+      firstNew_(newRegs.size()), scannedRemattable_(false) {}
+
+  LiveInterval &getParent() const { return parent_; }
+  unsigned getReg() const { return parent_.reg; }
+
+  /// Iterator for accessing the new registers added by this edit.
+  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+  iterator begin() const { return newRegs_.begin()+firstNew_; }
+  iterator end() const { return newRegs_.end(); }
+  unsigned size() const { return newRegs_.size()-firstNew_; }
+  bool empty() const { return size() == 0; }
+  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+  /// create - Create a new register with the same class and stack slot as
+  /// parent.
+  LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+
+  /// anyRematerializable - Return true if any parent values may be
+  /// rematerializable.
+  /// This function must be called before ny rematerialization is attempted.
+  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+                           AliasAnalysis*);
+
+  /// Remat - Information needed to rematerialize at a specific location.
+  struct Remat {
+    VNInfo *ParentVNI;      // parent_'s value at the remat location.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+  };
+
+  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+  /// When cheapAsAMove is set, only cheap remats are allowed.
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
+                          bool cheapAsAMove,
+                          LiveIntervals &lis);
+
+  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+  /// instruction into MBB before MI. The new instruction is mapped, but
+  /// liveness is not updated.
+  /// Return the SlotIndex of the new instruction.
+  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg,
+                            const Remat &RM,
+                            LiveIntervals&,
+                            const TargetInstrInfo&,
+                            const TargetRegisterInfo&);
+
+  /// markRematerialized - explicitly mark a value as rematerialized after doing
+  /// it manually.
+  void markRematerialized(VNInfo *ParentVNI) {
+    rematted_.insert(ParentVNI);
+  }
+
+  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+  bool didRematerialize(VNInfo *ParentVNI) const {
+    return rematted_.count(ParentVNI);
+  }
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b5c385f77239..c75196a47210 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -26,7 +26,9 @@ using namespace llvm;
 
 char LiveStacks::ID = 0;
 INITIALIZE_PASS(LiveStacks, "livestacks",
-                "Live Stack Slot Analysis", false, false);
+                "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
 
 void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -48,6 +50,22 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
   return false;
 }
 
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+  assert(Slot >= 0 && "Spill slot indice must be >= 0");
+  SS2IntervalMap::iterator I = S2IMap.find(Slot);
+  if (I == S2IMap.end()) {
+    I = S2IMap.insert(I, std::make_pair(Slot,
+            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    S2RCMap.insert(std::make_pair(Slot, RC));
+  } else {
+    // Use the largest common subclass register class.
+    const TargetRegisterClass *OldRC = S2RCMap[Slot];
+    S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+  }
+  return I->second;
+}
+
 /// print - Implement the dump method.
 void LiveStacks::print(raw_ostream &OS, const Module*) const {
 
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 375307b973a9..dd43ef2530c1 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -31,7 +31,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
@@ -42,8 +41,11 @@
 using namespace llvm;
 
 char LiveVariables::ID = 0;
-INITIALIZE_PASS(LiveVariables, "livevars",
-                "Live Variable Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
 
 
 void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -79,13 +81,7 @@ void LiveVariables::VarInfo::dump() const {
 LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
   assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
          "getVarInfo: not a virtual register!");
-  RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
-  if (RegIdx >= VirtRegInfo.size()) {
-    if (RegIdx >= 2*VirtRegInfo.size())
-      VirtRegInfo.resize(RegIdx*2);
-    else
-      VirtRegInfo.resize(2*VirtRegInfo.size());
-  }
+  VirtRegInfo.grow(RegIdx);
   return VirtRegInfo[RegIdx];
 }
 
@@ -498,9 +494,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
   PHIJoins.clear();
 
-  /// Get some space for a respectable number of registers.
-  VirtRegInfo.resize(64);
-
   analyzePHINodes(mf);
 
   // Calculate live variable information in depth first order on the CFG of the
@@ -628,19 +621,14 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
 
   // Convert and transfer the dead / killed information we have gathered into
   // VirtRegInfo onto MI's.
-  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
-    for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
-      if (VirtRegInfo[i].Kills[j] ==
-          MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
-        VirtRegInfo[i]
-          .Kills[j]->addRegisterDead(i +
-                                     TargetRegisterInfo::FirstVirtualRegister,
-                                     TRI);
+  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
       else
-        VirtRegInfo[i]
-          .Kills[j]->addRegisterKilled(i +
-                                       TargetRegisterInfo::FirstVirtualRegister,
-                                       TRI);
+        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+  }
 
   // Check to make sure there are no unreachable blocks in the MC CFG for the
   // function.  If so, it is due to a bug in the instruction selector or some
@@ -775,8 +763,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
         getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
 
   // Update info for all live variables
-  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
-         E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     VarInfo &VI = getVarInfo(Reg);
     if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
       VI.AliveBlocks.set(NumNew);
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 7e366f0ceec0..1318d6212497 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -9,7 +9,7 @@
 //
 // This pass assigns local frame indices to stack slots relative to one another
 // and allocates additional base registers to access them when the target
-// estimates the are likely to be out of range of stack pointer and frame
+// estimates they are likely to be out of range of stack pointer and frame
 // pointer relative addressing.
 //
 //===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 using namespace llvm;
 
@@ -152,9 +152,9 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
 void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *MFI = Fn.getFrameInfo();
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
   int64_t Offset = 0;
   unsigned MaxAlign = 0;
 
@@ -227,27 +227,28 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
 
   MachineFrameInfo *MFI = Fn.getFrameInfo();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
-  MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Collect all of the instructions in the block that reference
   // a frame index. Also store the frame index referenced to ease later
   // lookup. (For any insn that has more than one FI reference, we arbitrarily
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
-  // A base register definition is a register+offset pair.
-  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
 
+  // A base register definition is a register + offset pair.
+  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
 
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
+
       // Debug value instructions can't be out of range, so they don't need
       // any updates.
       if (MI->isDebugValue())
         continue;
+
       // For now, allocate the base register(s) within the basic block
       // where they're used, and don't try to keep them around outside
       // of that. It may be beneficial to try sharing them more broadly
@@ -268,11 +269,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
       }
     }
   }
+
   // Sort the frame references by local offset
   array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
 
+  MachineBasicBlock *Entry = Fn.begin();
 
-  // Loop throught the frame references and allocate for them as necessary
+  // Loop through the frame references and allocate for them as necessary.
   for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
     MachineBasicBlock::iterator I =
       FrameReferenceInsns[ref].getMachineInstr();
@@ -321,10 +324,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
             DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
                   " at frame local offset " <<
                   LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
             // Tell the target to insert the instruction to initialize
             // the base register.
-            TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
-                                              FrameIdx, InstrOffset);
+            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                              InstrOffset);
 
             // The base register already includes any offset specified
             // by the instruction, so account for that so it doesn't get
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 50f3f672dced..ccbff0af5b2c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -146,27 +147,46 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
   return I;
 }
 
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+    ++I;
+  return I;
+}
+
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   iterator I = end();
-  while (I != begin() && (--I)->getDesc().isTerminator())
+  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
     ; /*noop */
-  if (I != end() && !I->getDesc().isTerminator()) ++I;
+  while (I != end() && !I->getDesc().isTerminator())
+    ++I;
   return I;
 }
 
-void MachineBasicBlock::dump() const {
-  print(dbgs());
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+  iterator B = begin(), I = end();
+  while (I != B) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+  // A block with a landing pad successor only has one other successor.
+  if (succ_size() > 2)
+    return 0;
+  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+    if ((*I)->isLandingPad())
+      return *I;
+  return 0;
 }
 
-static inline void OutputReg(raw_ostream &os, unsigned RegNo,
-                             const TargetRegisterInfo *TRI = 0) {
-  if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
-    if (TRI)
-      os << " %" << TRI->get(RegNo).Name;
-    else
-      os << " %physreg" << RegNo;
-  } else
-    os << " %reg" << RegNo;
+void MachineBasicBlock::dump() const {
+  print(dbgs());
 }
 
 StringRef MachineBasicBlock::getName() const {
@@ -176,7 +196,7 @@ StringRef MachineBasicBlock::getName() const {
     return "(null)";
 }
 
-void MachineBasicBlock::print(raw_ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
     OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -186,6 +206,9 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
 
   if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
 
+  if (Indexes)
+    OS << Indexes->getMBBStartIdx(this) << '\t';
+
   OS << "BB#" << getNumber() << ": ";
 
   const char *Comma = "";
@@ -198,28 +221,36 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
   OS << '\n';
 
-  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();  
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   if (!livein_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Live Ins:";
     for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
-      OutputReg(OS, *I, TRI);
+      OS << ' ' << PrintReg(*I, TRI);
     OS << '\n';
   }
   // Print the preds of this block according to the CFG.
   if (!pred_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Predecessors according to CFG:";
     for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
       OS << " BB#" << (*PI)->getNumber();
     OS << '\n';
   }
-  
+
   for (const_iterator I = begin(); I != end(); ++I) {
+    if (Indexes) {
+      if (Indexes->hasIndex(I))
+        OS << Indexes->getInstructionIndex(I);
+      OS << '\t';
+    }
     OS << '\t';
     I->print(OS, &getParent()->getTarget());
   }
 
   // Print the successors of this block according to the CFG.
   if (!succ_empty()) {
+    if (Indexes) OS << '\t';
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
       OS << " BB#" << (*SI)->getNumber();
@@ -431,14 +462,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   MachineFunction *MF = getParent();
   DebugLoc dl;  // FIXME: this is nowhere
 
-  // We may need to update this's terminator, but we can't do that if AnalyzeBranch
-  // fails. If this uses a jump table, we won't touch it.
+  // We may need to update this's terminator, but we can't do that if
+  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
   if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
     return NULL;
 
+  // Avoid bugpoint weirdness: A block may end with a conditional branch but
+  // jumps to the same MBB is either case. We have duplicate CFG edges in that
+  // case that we can't handle. Since this never happens in properly optimized
+  // code, just skip those edges.
+  if (TBB && TBB == FBB) {
+    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+                 << getNumber() << '\n');
+    return NULL;
+  }
+
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
   MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
   DEBUG(dbgs() << "Splitting critical edge:"
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 272b54dea1fa..07a7d27b019f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -22,15 +22,18 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
 
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
-STATISTIC(NumPhysCSEs,  "Number of phyreg defining common subexpr eliminated");
+STATISTIC(NumPhysCSEs,
+          "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
   class MachineCSE : public MachineFunctionPass {
@@ -41,7 +44,9 @@ namespace {
     MachineRegisterInfo *MRI;
   public:
     static char ID; // Pass identification
-    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     
@@ -61,10 +66,13 @@ namespace {
 
   private:
     const unsigned LookAheadLimit;
-    typedef ScopedHashTableScope<MachineInstr*, unsigned,
-                                 MachineInstrExpressionTrait> ScopeType;
+    typedef RecyclingAllocator<BumpPtrAllocator,
+        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+    typedef ScopedHashTable<MachineInstr*, unsigned,
+        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+    typedef ScopedHTType::ScopeTy ScopeType;
     DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
-    ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+    ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
 
@@ -72,11 +80,11 @@ namespace {
     bool isPhysDefTriviallyDead(unsigned Reg,
                                 MachineBasicBlock::const_iterator I,
                                 MachineBasicBlock::const_iterator E) const ;
-    bool hasLivePhysRegDefUse(const MachineInstr *MI,
-                              const MachineBasicBlock *MBB,
-                              unsigned &PhysDef) const;
-    bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
-                           unsigned PhysDef) const;
+    bool hasLivePhysRegDefUses(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB,
+                               SmallSet<unsigned,8> &PhysRefs) const;
+    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                          SmallSet<unsigned,8> &PhysRefs) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -91,8 +99,12 @@ namespace {
 } // end anonymous namespace
 
 char MachineCSE::ID = 0;
-INITIALIZE_PASS(MachineCSE, "machine-cse",
-                "Machine Common Subexpression Elimination", false, false);
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
 
 FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
 
@@ -104,7 +116,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
     if (!MO.isReg() || !MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (!MRI->hasOneNonDBGUse(Reg))
       // Only coalesce single use copies. This ensure the copy will be
@@ -120,17 +132,12 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       continue;
     if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
       continue;
-    const TargetRegisterClass *SRC   = MRI->getRegClass(SrcReg);
-    const TargetRegisterClass *RC    = MRI->getRegClass(Reg);
-    const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
-    if (!NewRC)
+    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
       continue;
     DEBUG(dbgs() << "Coalescing: " << *DefMI);
-    DEBUG(dbgs() << "*** to: " << *MI);
+    DEBUG(dbgs() << "***     to: " << *MI);
     MO.setReg(SrcReg);
     MRI->clearKillFlags(SrcReg);
-    if (NewRC != SRC)
-      MRI->setRegClass(SrcReg, NewRC);
     DefMI->eraseFromParent();
     ++NumCoalesces;
     Changed = true;
@@ -176,14 +183,14 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
   return false;
 }
 
-/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
 /// physical registers (except for dead defs of physical registers). It also
 /// returns the physical register def by reference if it's the only one and the
 /// instruction does not uses a physical register.
-bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
-                                      const MachineBasicBlock *MBB,
-                                      unsigned &PhysDef) const {
-  PhysDef = 0;
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+                                       const MachineBasicBlock *MBB,
+                                       SmallSet<unsigned,8> &PhysRefs) const {
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
@@ -193,35 +200,22 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
       continue;
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-    if (MO.isUse()) {
-      // Can't touch anything to read a physical register.
-      PhysDef = 0;
-      return true;
-    }
-    if (MO.isDead())
-      // If the def is dead, it's ok.
-      continue;
-    // Ok, this is a physical register def that's not marked "dead". That's
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
     // common since this pass is run before livevariables. We can scan
     // forward a few instructions and check if it is obviously dead.
-    if (PhysDef) {
-      // Multiple physical register defs. These are rare, forget about it.
-      PhysDef = 0;
-      return true;
-    }
-    PhysDef = Reg;
+    if (MO.isDef() &&
+        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+      continue;
+    PhysRefs.insert(Reg);
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+      PhysRefs.insert(*Alias);
   }
 
-  if (PhysDef) {
-    MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
-    if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
-      return true;
-  }
-  return false;
+  return !PhysRefs.empty();
 }
 
-bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
-                                  unsigned PhysDef) const {
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                                  SmallSet<unsigned,8> &PhysRefs) const {
   // For now conservatively returns false if the common subexpression is
   // not in the same basic block as the given instruction.
   MachineBasicBlock *MBB = MI->getParent();
@@ -237,8 +231,17 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
 
     if (I == E)
       return true;
-    if (I->modifiesRegister(PhysDef, TRI))
-      return false;
+
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(MOReg))
+        continue;
+      if (PhysRefs.count(MOReg))
+        return false;
+    }
 
     --LookAheadLeft;
     ++I;
@@ -259,7 +262,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
   // Ignore stuff that we obviously can't move.
   const TargetInstrDesc &TID = MI->getDesc();  
   if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-      TID.hasUnmodeledSideEffects())
+      MI->hasUnmodeledSideEffects())
     return false;
 
   if (TID.mayLoad()) {
@@ -281,14 +284,13 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
                                    MachineInstr *CSMI, MachineInstr *MI) {
   // FIXME: Heuristics that works around the lack the live range splitting.
 
-  // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
-  // immediate predecessor. We don't want to increase register pressure and end up
-  // causing other computation to be spilled.
+  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+  // an immediate predecessor. We don't want to increase register pressure and
+  // end up causing other computation to be spilled.
   if (MI->getDesc().isAsCheapAsAMove()) {
     MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
-    if (CSBB != BB && 
-        find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+    if (CSBB != BB && !CSBB->isSuccessor(BB))
       return false;
   }
 
@@ -297,7 +299,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   bool HasVRegUse = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isUse() && MO.getReg() &&
+    if (MO.isReg() && MO.isUse() &&
         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
       HasVRegUse = true;
       break;
@@ -359,7 +361,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     if (!isCSECandidate(MI))
       continue;
 
-    bool DefPhys = false;
     bool FoundCSE = VNT.count(MI);
     if (!FoundCSE) {
       // Look for trivial copy coalescing opportunities.
@@ -370,24 +371,37 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         FoundCSE = VNT.count(MI);
       }
     }
-    // FIXME: commute commutable instructions?
 
-    // If the instruction defines a physical register and the value *may* be
+    // Commute commutable instructions.
+    bool Commuted = false;
+    if (!FoundCSE && MI->getDesc().isCommutable()) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI) {
+        Commuted = true;
+        FoundCSE = VNT.count(NewMI);
+        if (NewMI != MI)
+          // New instruction. It doesn't need to be kept.
+          NewMI->eraseFromParent();
+        else if (!FoundCSE)
+          // MI was changed but it didn't help, commute it back!
+          (void)TII->commuteInstruction(MI);
+      }
+    }
+
+    // If the instruction defines physical registers and the values *may* be
     // used, then it's not safe to replace it with a common subexpression.
-    unsigned PhysDef = 0;
-    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
+    // It's also not safe if the instruction uses physical registers.
+    SmallSet<unsigned,8> PhysRefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
       FoundCSE = false;
 
       // ... Unless the CS is local and it also defines the physical register
-      // which is not clobbered in between.
-      if (PhysDef) {
-        unsigned CSVN = VNT.lookup(MI);
-        MachineInstr *CSMI = Exps[CSVN];
-        if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
-          FoundCSE = true;
-          DefPhys = true;
-        }
-      }
+      // which is not clobbered in between and the physical register uses 
+      // were not clobbered.
+      unsigned CSVN = VNT.lookup(MI);
+      MachineInstr *CSMI = Exps[CSVN];
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+        FoundCSE = true;
     }
 
     if (!FoundCSE) {
@@ -432,8 +446,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       }
       MI->eraseFromParent();
       ++NumCSEs;
-      if (DefPhys)
+      if (!PhysRefs.empty())
         ++NumPhysCSEs;
+      if (Commuted)
+        ++NumCommutes;
     } else {
       DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
       VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 3c674789244a..04c8ecbf9bdc 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -25,7 +25,7 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
 char MachineDominatorTree::ID = 0;
 
 INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
-                "MachineDominator Tree Construction", true, true);
+                "MachineDominator Tree Construction", true, true)
 
 char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
 
@@ -42,6 +42,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
 
 MachineDominatorTree::MachineDominatorTree()
     : MachineFunctionPass(ID) {
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
   DT = new DominatorTreeBase<MachineBasicBlock>(false);
 }
 
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 017170076ceb..85532407ca43 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -33,7 +33,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
@@ -52,14 +52,15 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
 }
 
 MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
-                                 unsigned FunctionNum, MachineModuleInfo &mmi)
-  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
+                                 unsigned FunctionNum, MachineModuleInfo &mmi,
+                                 GCModuleInfo* gmi)
+  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
   if (Fn->hasFnAttr(Attribute::StackAlignment))
     FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
         Fn->getAttributes().getFnAttributes()));
@@ -190,20 +191,21 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 }
 
 MachineMemOperand *
-MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
-                                      int64_t o, uint64_t s,
-                                      unsigned base_alignment) {
-  return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+                                      uint64_t s, unsigned base_alignment,
+                                      const MDNode *TBAAInfo) {
+  return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+                                           TBAAInfo);
 }
 
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
                                       int64_t Offset, uint64_t Size) {
   return new (Allocator)
-             MachineMemOperand(MMO->getValue(), MMO->getFlags(),
-                               int64_t(uint64_t(MMO->getOffset()) +
-                                       uint64_t(Offset)),
-                               Size, MMO->getBaseAlignment());
+             MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+                                                  MMO->getOffset()+Offset),
+                               MMO->getFlags(), Size,
+                               MMO->getBaseAlignment(), 0);
 }
 
 MachineInstr::mmo_iterator
@@ -231,10 +233,10 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
       else {
         // Clone the MMO and unset the store flag.
         MachineMemOperand *JustLoad =
-          getMachineMemOperand((*I)->getValue(),
+          getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOStore,
-                               (*I)->getOffset(), (*I)->getSize(),
-                               (*I)->getBaseAlignment());
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
         Result[Index] = JustLoad;
       }
       ++Index;
@@ -263,10 +265,10 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
       else {
         // Clone the MMO and unset the load flag.
         MachineMemOperand *JustStore =
-          getMachineMemOperand((*I)->getValue(),
+          getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOLoad,
-                               (*I)->getOffset(), (*I)->getSize(),
-                               (*I)->getBaseAlignment());
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
         Result[Index] = JustStore;
       }
       ++Index;
@@ -279,7 +281,7 @@ void MachineFunction::dump() const {
   print(dbgs());
 }
 
-void MachineFunction::print(raw_ostream &OS) const {
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   OS << "# Machine code for function " << Fn->getName() << ":\n";
 
   // Print Frame Information
@@ -328,7 +330,7 @@ void MachineFunction::print(raw_ostream &OS) const {
   
   for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
     OS << '\n';
-    BB->print(OS);
+    BB->print(OS, Indexes);
   }
 
   OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
@@ -346,17 +348,15 @@ namespace llvm {
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
                              const MachineFunction *Graph) {
-      if (isSimple () && Node->getBasicBlock() &&
-          !Node->getBasicBlock()->getName().empty())
-        return Node->getBasicBlock()->getNameStr() + ":";
-
       std::string OutStr;
       {
         raw_string_ostream OSS(OutStr);
-        
-        if (isSimple())
-          OSS << Node->getNumber() << ':';
-        else
+
+        if (isSimple()) {
+          OSS << "BB#" << Node->getNumber();
+          if (const BasicBlock *BB = Node->getBasicBlock())
+            OSS << ": " << BB->getName();
+        } else
           Node->print(OSS);
       }
 
@@ -396,7 +396,8 @@ void MachineFunction::viewCFGOnly() const
 /// addLiveIn - Add the specified physical register as a live-in value and
 /// create a corresponding virtual register for it.
 unsigned MachineFunction::addLiveIn(unsigned PReg,
-                                    const TargetRegisterClass *RC) {
+                                    const TargetRegisterClass *RC,
+                                    DebugLoc DL) {
   MachineRegisterInfo &MRI = getRegInfo();
   unsigned VReg = MRI.getLiveInVirtReg(PReg);
   if (VReg) {
@@ -405,6 +406,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   }
   VReg = MRI.createVirtualRegister(RC);
   MRI.addLiveIn(PReg, VReg);
+  MRI.addLiveInLoc(VReg, DL);
   return VReg;
 }
 
@@ -426,6 +428,13 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   return Ctx.GetOrCreateSymbol(Name.str());
 }
 
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+  const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+                               Twine(getFunctionNumber())+"$pb");
+}
 
 //===----------------------------------------------------------------------===//
 //  MachineFrameInfo implementation
@@ -485,7 +494,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
 void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
   if (Objects.empty()) return;
 
-  const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+  const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
   int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
 
   OS << "Frame Objects:\n";
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 4f84b952e061..054c750c9f2b 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -12,22 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 using namespace llvm;
 
-// Register this pass with PassInfo directly to avoid having to define
-// a default constructor.
-static PassInfo
-X("Machine Function Analysis", "machine-function-analysis",
-   &MachineFunctionAnalysis::ID, 0,
-  /*CFGOnly=*/false, /*is_analysis=*/true);
-
 char MachineFunctionAnalysis::ID = 0;
 
 MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
                                                  CodeGenOpt::Level OL) :
   FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
 MachineFunctionAnalysis::~MachineFunctionAnalysis() {
@@ -52,7 +47,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
 bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
   MF = new MachineFunction(&F, TM, NextFnNum++,
-                           getAnalysis<MachineModuleInfo>());
+                           getAnalysis<MachineModuleInfo>(),
+                           getAnalysisIfAvailable<GCModuleInfo>());
   return false;
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 446e461d5460..aa9ea61acec7 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -102,13 +102,13 @@ void MachineOperand::setReg(unsigned Reg) {
     if (MachineBasicBlock *MBB = MI->getParent())
       if (MachineFunction *MF = MBB->getParent()) {
         RemoveRegOperandFromRegInfo();
-        Contents.Reg.RegNo = Reg;
+        SmallContents.RegNo = Reg;
         AddRegOperandToRegInfo(&MF->getRegInfo());
         return;
       }
         
   // Otherwise, just change the register, no problem.  :)
-  Contents.Reg.RegNo = Reg;
+  SmallContents.RegNo = Reg;
 }
 
 void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
@@ -159,7 +159,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   } else {
     // Otherwise, change this to a register and set the reg#.
     OpKind = MO_Register;
-    Contents.Reg.RegNo = Reg;
+    SmallContents.RegNo = Reg;
 
     // If this operand is embedded in a function, add the operand to the
     // register's use/def list.
@@ -227,24 +227,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
       if (const MachineBasicBlock *MBB = MI->getParent())
         if (const MachineFunction *MF = MBB->getParent())
           TM = &MF->getTarget();
+  const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
 
   switch (getType()) {
   case MachineOperand::MO_Register:
-    if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
-      OS << "%reg" << getReg();
-    } else {
-      if (TM)
-        OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
-      else
-        OS << "%physreg" << getReg();
-    }
-
-    if (getSubReg() != 0) {
-      if (TM)
-        OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
-      else
-        OS << ':' << getSubReg();
-    }
+    OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
         isEarlyClobber()) {
@@ -335,10 +322,45 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
 // MachineMemOperand Implementation
 //===----------------------------------------------------------------------===//
 
-MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
-                                     int64_t o, uint64_t s, unsigned int a)
-  : Offset(o), Size(s), V(v),
-    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+  if (V == 0) return 0;
+  return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+  return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+  return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+  return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+  return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+  return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+                                     uint64_t s, unsigned int a,
+                                     const MDNode *TBAAInfo)
+  : PtrInfo(ptrinfo), Size(s),
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+    TBAAInfo(TBAAInfo) {
+  assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+         "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
@@ -346,9 +368,9 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
 /// Profile - Gather unique data for the object.
 ///
 void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
-  ID.AddInteger(Offset);
+  ID.AddInteger(getOffset());
   ID.AddInteger(Size);
-  ID.AddPointer(V);
+  ID.AddPointer(getValue());
   ID.AddInteger(Flags);
 }
 
@@ -364,8 +386,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
       ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
     // Also update the base and offset, because the new alignment may
     // not be applicable with the old ones.
-    V = MMO->getValue();
-    Offset = MMO->getOffset();
+    PtrInfo = MMO->PtrInfo;
   }
 }
 
@@ -410,6 +431,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
       MMO.getBaseAlignment() != MMO.getSize())
     OS << "(align=" << MMO.getAlignment() << ")";
 
+  // Print TBAA info.
+  if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+    OS << "(tbaa=";
+    if (TBAAInfo->getNumOperands() > 0)
+      WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
   return OS;
 }
 
@@ -782,6 +813,14 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
+bool MachineInstr::isStackAligningInlineAsm() const {
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      return true;
+  }
+  return false;
+}
 
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
@@ -881,14 +920,15 @@ int MachineInstr::findFirstPredOperandIdx() const {
 bool MachineInstr::
 isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
   if (isInlineAsm()) {
-    assert(DefOpIdx >= 3);
+    assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
     const MachineOperand &MO = getOperand(DefOpIdx);
     if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
       return false;
     // Determine the actual operand index that corresponds to this index.
     unsigned DefNo = 0;
     unsigned DefPart = 0;
-    for (unsigned i = 2, e = getNumOperands(); i < e; ) {
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i < e; ) {
       const MachineOperand &FMO = getOperand(i);
       // After the normal asm operands there may be additional imp-def regs.
       if (!FMO.isImm())
@@ -903,7 +943,8 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
       }
       ++DefNo;
     }
-    for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i != e; ++i) {
       const MachineOperand &FMO = getOperand(i);
       if (!FMO.isImm())
         continue;
@@ -946,7 +987,8 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
 
     // Find the flag operand corresponding to UseOpIdx
     unsigned FlagIdx, NumOps=0;
-    for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+    for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+         FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
       const MachineOperand &UFMO = getOperand(FlagIdx);
       // After the normal asm operands there may be additional imp-def regs.
       if (!UFMO.isImm())
@@ -964,9 +1006,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
       if (!DefOpIdx)
         return true;
 
-      unsigned DefIdx = 2;
+      unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
       // Remember to adjust the index. First operand is asm string, second is
-      // the AlignStack bit, then there is a flag for each.
+      // the HasSideEffects and AlignStack bits, then there is a flag for each.
       while (DefNo) {
         const MachineOperand &FMO = getOperand(DefIdx);
         assert(FMO.isImm());
@@ -1071,7 +1113,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
     SawStore = true;
     return false;
   }
-  if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+
+  if (isLabel() || isDebugValue() ||
+      TID->isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1122,7 +1166,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
   if (!TID->mayStore() &&
       !TID->mayLoad() &&
       !TID->isCall() &&
-      !TID->hasUnmodeledSideEffects())
+      !hasUnmodeledSideEffects())
     return false;
 
   // Otherwise, if the instruction has no memory reference information,
@@ -1166,7 +1210,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
         if (PSV->isConstant(MFI))
           continue;
       // If we have an AliasAnalysis, ask it whether the memory is constant.
-      if (AA && AA->pointsToConstantMemory(V))
+      if (AA && AA->pointsToConstantMemory(
+                      AliasAnalysis::Location(V, (*I)->getSize(),
+                                              (*I)->getTBAAInfo())))
         continue;
     }
 
@@ -1194,6 +1240,18 @@ unsigned MachineInstr::isConstantValuePHI() const {
   return Reg;
 }
 
+bool MachineInstr::hasUnmodeledSideEffects() const {
+  if (getDesc().hasUnmodeledSideEffects())
+    return true;
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      return true;
+  }
+
+  return false;
+}
+
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
@@ -1207,6 +1265,17 @@ bool MachineInstr::allDefsAreDead() const {
   return true;
 }
 
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      addOperand(MO);
+  }
+}
+
 void MachineInstr::dump() const {
   dbgs() << "  " << *this;
 }
@@ -1257,7 +1326,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (StartOp != 0) OS << ", ";
     getOperand(StartOp).print(OS, TM);
     unsigned Reg = getOperand(StartOp).getReg();
-    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
   }
 
@@ -1270,11 +1339,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
   bool FirstOp = true;
+
+  if (isInlineAsm()) {
+    // Print asm string.
+    OS << " ";
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+    // Print HasSideEffects, IsAlignStack
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      OS << " [sideeffect]";
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      OS << " [alignstack]";
+
+    StartOp = InlineAsm::MIOp_FirstOperand;
+    FirstOp = false;
+  }
+
+
   for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
 
-    if (MO.isReg() && MO.getReg() &&
-        TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
       VirtRegs.push_back(MO.getReg());
 
     // Omit call-clobbered registers which aren't used anywhere. This makes
@@ -1284,7 +1370,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (MF && getDesc().isCall() &&
         MO.isReg() && MO.isImplicit() && MO.isDef()) {
       unsigned Reg = MO.getReg();
-      if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         const MachineRegisterInfo &MRI = MF->getRegInfo();
         if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
           bool HasAliasLive = false;
@@ -1348,14 +1434,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     for (unsigned i = 0; i != VirtRegs.size(); ++i) {
       const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
-      OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+      OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
       for (unsigned j = i+1; j != VirtRegs.size();) {
         if (MRI->getRegClass(VirtRegs[j]) != RC) {
           ++j;
           continue;
         }
         if (VirtRegs[i] != VirtRegs[j])
-          OS << "," << VirtRegs[j];
+          OS << "," << PrintReg(VirtRegs[j]);
         VirtRegs.erase(VirtRegs.begin()+j);
       }
     }
@@ -1533,8 +1619,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
     switch (MO.getType()) {
     default: break;
     case MachineOperand::MO_Register:
-      if (MO.isDef() && MO.getReg() &&
-          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;  // Skip virtual register defs.
       Key |= MO.getReg();
       break;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1a74b747e9f2..443fc2d97bdf 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,8 +28,10 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
@@ -40,8 +42,14 @@
 
 using namespace llvm;
 
-STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
-STATISTIC(NumCSEed,   "Number of hoisted machine instructions CSEed");
+STATISTIC(NumHoisted,
+          "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+          "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+          "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+          "Number of hoisted machine instructions CSEed");
 STATISTIC(NumPostRAHoisted,
           "Number of machine instructions hoisted out of loops post regalloc");
 
@@ -51,9 +59,11 @@ namespace {
 
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
+    const TargetLowering *TLI;
     const TargetRegisterInfo *TRI;
     const MachineFrameInfo *MFI;
-    MachineRegisterInfo *RegInfo;
+    MachineRegisterInfo *MRI;
+    const InstrItineraryData *InstrItins;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
@@ -68,23 +78,37 @@ namespace {
 
     BitVector AllocatableSet;
 
+    // Track 'estimated' register pressure.
+    SmallSet<unsigned, 32> RegSeen;
+    SmallVector<unsigned, 8> RegPressure;
+
+    // Register pressure "limit" per register class. If the pressure
+    // is higher than the limit, then it's considered high.
+    SmallVector<unsigned, 8> RegLimit;
+
+    // Register pressure on path leading from loop preheader to current BB.
+    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
     // For each opcode, keep a list of potential CSE instructions.
     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
 
   public:
     static char ID; // Pass identification, replacement for typeid
     MachineLICM() :
-      MachineFunctionPass(ID), PreRegAlloc(true) {}
+      MachineFunctionPass(ID), PreRegAlloc(true) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
 
     explicit MachineLICM(bool PreRA) :
-      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     const char *getPassName() const { return "Machine Instruction LICM"; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AliasAnalysis>();
@@ -94,6 +118,13 @@ namespace {
     }
 
     virtual void releaseMemory() {
+      RegSeen.clear();
+      RegPressure.clear();
+      RegLimit.clear();
+      BackTrace.clear();
+      for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+             CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+        CI->second.clear();
       CSEMap.clear();
     }
 
@@ -138,6 +169,24 @@ namespace {
     /// 
     bool IsLoopInvariantInst(MachineInstr &I);
 
+    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop, return true if the target considered
+    /// it 'high'.
+    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+                               unsigned Reg) const;
+
+    bool IsCheapInstruction(MachineInstr &MI) const;
+
+    /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+    /// check if hoisting an instruction of the given cost matrix can cause high
+    /// register pressure.
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+    /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+    /// the current block and update their register pressures to reflect the
+    /// effect of hoisting MI from the current block to the preheader.
+    void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
     /// IsProfitableToHoist - Return true if it is potentially profitable to
     /// hoist the given loop invariant.
     bool IsProfitableToHoist(MachineInstr &MI);
@@ -148,11 +197,16 @@ namespace {
     /// visit definitions before uses, allowing us to hoist a loop body in one
     /// pass without iteration.
     ///
-    void HoistRegion(MachineDomTreeNode *N);
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+    /// InitRegPressure - Find all virtual register references that are liveout
+    /// of the preheader to initialize the starting "register pressure". Note
+    /// this does not count live through (livein but not used) registers.
+    void InitRegPressure(MachineBasicBlock *BB);
 
-    /// isLoadFromConstantMemory - Return true if the given instruction is a
-    /// load from constant memory.
-    bool isLoadFromConstantMemory(MachineInstr *MI);
+    /// UpdateRegPressure - Update estimate of register pressure after the
+    /// specified instruction.
+    void UpdateRegPressure(const MachineInstr *MI);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -174,8 +228,8 @@ namespace {
 
     /// Hoist - When an instruction is found to only use loop invariant operands
     /// that is safe to hoist, this instruction is called to do the dirty work.
-    ///
-    void Hoist(MachineInstr *MI);
+    /// It returns true if the instruction is hoisted.
+    bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
 
     /// InitCSEMap - Initialize the CSE map with instructions that are in the
     /// current loop preheader that may become duplicates of instructions that
@@ -189,8 +243,13 @@ namespace {
 } // end anonymous namespace
 
 char MachineLICM::ID = 0;
-INITIALIZE_PASS(MachineLICM, "machinelicm",
-                "Machine Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
 
 FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
   return new MachineLICM(PreRegAlloc);
@@ -212,18 +271,32 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   if (PreRegAlloc)
-    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
   else
-    DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
 
   Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
+  TLI = TM->getTargetLowering();
   TRI = TM->getRegisterInfo();
   MFI = MF.getFrameInfo();
-  RegInfo = &MF.getRegInfo();
+  MRI = &MF.getRegInfo();
+  InstrItins = TM->getInstrItineraryData();
   AllocatableSet = TRI->getAllocatableSet(MF);
 
+  if (PreRegAlloc) {
+    // Estimate register pressure during pre-regalloc pass.
+    unsigned NumRC = TRI->getNumRegClasses();
+    RegPressure.resize(NumRC);
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+    RegLimit.resize(NumRC);
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+           E = TRI->regclass_end(); I != E; ++I)
+      RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
+  }
+
   // Get our Loop information...
   MLI = &getAnalysis<MachineLoopInfo>();
   DT  = &getAnalysis<MachineDominatorTree>();
@@ -248,7 +321,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       // being hoisted.
       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
       FirstInLoop = true;
-      HoistRegion(N);
+      HoistRegion(N, true);
       CSEMap.clear();
     }
   }
@@ -474,17 +547,33 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
 /// first order w.r.t the DominatorTree. This allows us to visit definitions
 /// before uses, allowing us to hoist a loop body in one pass without iteration.
 ///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
   assert(N != 0 && "Null dominator tree node?");
   MachineBasicBlock *BB = N->getBlock();
 
   // If this subregion is not in the top level loop at all, exit.
   if (!CurLoop->contains(BB)) return;
 
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  if (IsHeader) {
+    // Compute registers which are livein into the loop headers.
+    RegSeen.clear();
+    BackTrace.clear();
+    InitRegPressure(Preheader);
+  }
+
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+
   for (MachineBasicBlock::iterator
          MII = BB->begin(), E = BB->end(); MII != E; ) {
     MachineBasicBlock::iterator NextMII = MII; ++NextMII;
-    Hoist(&*MII);
+    MachineInstr *MI = &*MII;
+    if (!Hoist(MI, Preheader))
+      UpdateRegPressure(MI);
     MII = NextMII;
   }
 
@@ -496,6 +585,99 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
     for (unsigned I = 0, E = Children.size(); I != E; ++I)
       HoistRegion(Children[I]);
   }
+
+  BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+  std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+  // If the preheader has only a single predecessor and it ends with a
+  // fallthrough or an unconditional branch, then scan its predecessor for live
+  // defs as well. This happens whenever the preheader is created by splitting
+  // the critical edge from the loop predecessor to the loop header.
+  if (BB->pred_size() == 1) {
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+      InitRegPressure(*BB->pred_begin());
+  }
+
+  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+       MII != E; ++MII) {
+    MachineInstr *MI = &*MII;
+    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      bool isNew = RegSeen.insert(Reg);
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (MO.isDef())
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      else {
+        bool isKill = isOperandKill(MO, MRI);
+        if (isNew && !isKill)
+          // Haven't seen this, it must be a livein.
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        else if (!isNew && isKill)
+          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      }
+    }
+  }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    bool isNew = RegSeen.insert(Reg);
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (!isNew && isOperandKill(MO, MRI)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+      if (RCCost > RegPressure[RCId])
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= RCCost;
+    }
+  }
+
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.pop_back_val();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    RegPressure[RCId] += RCCost;
+  }
 }
 
 /// IsLICMCandidate - Returns true if the instruction may be a suitable
@@ -535,14 +717,14 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!RegInfo->def_empty(Reg))
+        if (!MRI->def_empty(Reg))
           return false;
         if (AllocatableSet.test(Reg))
           return false;
         // Check for a def among the register's aliases too.
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (!RegInfo->def_empty(AliasReg))
+          if (!MRI->def_empty(AliasReg))
             return false;
           if (AllocatableSet.test(AliasReg))
             return false;
@@ -562,12 +744,12 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
     if (!MO.isUse())
       continue;
 
-    assert(RegInfo->getVRegDef(Reg) &&
+    assert(MRI->getVRegDef(Reg) &&
            "Machine instr not mapped for this vreg?!");
 
     // If the loop contains the definition of an operand, then the instruction
     // isn't loop invariant.
-    if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+    if (CurLoop->contains(MRI->getVRegDef(Reg)))
       return false;
   }
 
@@ -577,9 +759,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 
 
 /// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
-  for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
-         UE = RegInfo->use_end(); UI != UE; ++UI) {
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
     if (UseMI->isPHI())
       return true;
@@ -587,37 +769,210 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
   return false;
 }
 
-/// isLoadFromConstantMemory - Return true if the given instruction is a
-/// load from constant memory. Machine LICM will hoist these even if they are
-/// not re-materializable.
-bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
-  if (!MI->getDesc().mayLoad()) return false;
-  if (!MI->hasOneMemOperand()) return false;
-  MachineMemOperand *MMO = *MI->memoperands_begin();
-  if (MMO->isVolatile()) return false;
-  if (!MMO->getValue()) return false;
-  const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
-  if (PSV) {
-    MachineFunction &MF = *MI->getParent()->getParent();
-    return PSV->isConstant(MF.getFrameInfo());
-  } else {
-    return AA->pointsToConstantMemory(MMO->getValue());
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+                                        unsigned DefIdx, unsigned Reg) const {
+  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+    return false;
+
+  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (UseMI->isCopyLike())
+      continue;
+    if (!CurLoop->contains(UseMI->getParent()))
+      continue;
+    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = UseMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (MOReg != Reg)
+        continue;
+
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+        return true;
+    }
+
+    // Only look at the first in loop use.
+    break;
+  }
+
+  return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+    return true;
+  if (!InstrItins || InstrItins->isEmpty())
+    return false;
+
+  bool isCheap = false;
+  unsigned NumDefs = MI.getDesc().getNumDefs();
+  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+    MachineOperand &DefMO = MI.getOperand(i);
+    if (!DefMO.isReg() || !DefMO.isDef())
+      continue;
+    --NumDefs;
+    unsigned Reg = DefMO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+
+    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+      return false;
+    isCheap = true;
+  }
+
+  return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+       CI != CE; ++CI) {
+    if (CI->second <= 0) 
+      continue;
+
+    unsigned RCId = CI->first;
+    for (unsigned i = BackTrace.size(); i != 0; --i) {
+      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+      if (RP[RCId] + CI->second >= RegLimit[RCId])
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  // First compute the 'cost' of the instruction, i.e. its contribution
+  // to register pressure.
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    if (MO.isDef()) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second += RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, RCCost));
+    } else if (isOperandKill(MO, MRI)) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second -= RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, -RCCost));
+    }
+  }
+
+  // Update register pressure of blocks from loop header to current block.
+  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+    SmallVector<unsigned, 8> &RP = BackTrace[i];
+    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+         CI != CE; ++CI) {
+      unsigned RCId = CI->first;
+      RP[RCId] += CI->second;
+    }
   }
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
-  // FIXME: For now, only hoist re-materilizable instructions. LICM will
-  // increase register pressure. We want to make sure it doesn't increase
-  // spilling.
+  if (MI.isImplicitDef())
+    return true;
+
+  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+  // will increase register pressure. It's probably not worth it if the
+  // instruction is cheap.
   // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
   // these tend to help performance in low register pressure situation. The
   // trade off is it may cause spill in high pressure situation. It will end up
   // adding a store in the loop preheader. But the reload is no more expensive.
   // The side benefit is these loads are frequently CSE'ed.
-  if (!TII->isTriviallyReMaterializable(&MI, AA)) {
-    if (!isLoadFromConstantMemory(&MI))
+  if (IsCheapInstruction(MI)) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA))
+      return false;
+  } else {
+    // Estimate register pressure to determine whether to LICM the instruction.
+    // In low register pressure situation, we can be more aggressive about 
+    // hoisting. Also, favors hoisting long latency instructions even in
+    // moderately high pressure situation.
+    // FIXME: If there are long latency loop-invariant instructions inside the
+    // loop at this point, why didn't the optimizer's LICM hoist them?
+    DenseMap<unsigned, int> Cost;
+    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      if (MO.isDef()) {
+        if (HasHighOperandLatency(MI, i, Reg)) {
+          ++NumHighLatency;
+          return true;
+        }
+
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second += RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, RCCost));
+      } else if (isOperandKill(MO, MRI)) {
+        // Is a virtual register use is a kill, hoisting it out of the loop
+        // may actually reduce register pressure or be register pressure
+        // neutral.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second -= RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, -RCCost));
+      }
+    }
+
+    // Visit BBs from header to current BB, if hoisting this doesn't cause
+    // high register pressure, then it's safe to proceed.
+    if (!CanCauseHighRegPressure(Cost)) {
+      ++NumLowRP;
+      return true;
+    }
+
+    // High register pressure situation, only hoist if the instruction is going to
+    // be remat'ed.
+    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+        !MI.isInvariantLoad(AA))
       return false;
   }
 
@@ -628,7 +983,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isDef())
       continue;
-    if (HasPHIUses(MO.getReg(), RegInfo))
+    if (HasPHIUses(MO.getReg(), MRI))
       return false;
   }
 
@@ -636,10 +991,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 }
 
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+  // Don't unfold simple loads.
+  if (MI->getDesc().canFoldAsLoad())
+    return 0;
+
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!isLoadFromConstantMemory(MI))
+  if (!MI->isInvariantLoad(AA))
     return 0;
 
   // Next determine the register class for a temporary register.
@@ -654,7 +1013,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   if (TID.getNumDefs() != 1) return 0;
   const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
-  unsigned Reg = RegInfo->createVirtualRegister(RC);
+  unsigned Reg = MRI->createVirtualRegister(RC);
 
   MachineFunction &MF = *MI->getParent()->getParent();
   SmallVector<MachineInstr *, 2> NewMIs;
@@ -678,6 +1037,10 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
     NewMIs[1]->eraseFromParent();
     return 0;
   }
+
+  // Update register pressure for the unfolded instruction.
+  UpdateRegPressure(NewMIs[1]);
+
   // Otherwise we successfully unfolded a load that we can hoist.
   MI->eraseFromParent();
   return NewMIs[0];
@@ -686,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
     const MachineInstr *MI = &*I;
-    // FIXME: For now, only hoist re-materilizable instructions. LICM will
-    // increase register pressure. We want to make sure it doesn't increase
-    // spilling.
-    if (TII->isTriviallyReMaterializable(MI, AA)) {
-      unsigned Opcode = MI->getOpcode();
-      DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
-        CI = CSEMap.find(Opcode);
-      if (CI != CSEMap.end())
-        CI->second.push_back(MI);
-      else {
-        std::vector<const MachineInstr*> CSEMIs;
-        CSEMIs.push_back(MI);
-        CSEMap.insert(std::make_pair(Opcode, CSEMIs));
-      }
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+      CI = CSEMap.find(Opcode);
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
     }
   }
 }
@@ -709,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
                               std::vector<const MachineInstr*> &PrevMIs) {
   for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
     const MachineInstr *PrevMI = PrevMIs[i];
-    if (TII->produceSameValue(MI, PrevMI))
+    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
       return PrevMI;
   }
   return 0;
@@ -738,8 +1096,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 
       if (MO.isReg() && MO.isDef() &&
           !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-        RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-        RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+        MRI->clearKillFlags(Dup->getOperand(i).getReg());
       }
     }
     MI->eraseFromParent();
@@ -752,15 +1110,12 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 /// Hoist - When an instruction is found to use only loop invariant operands
 /// that are safe to hoist, this instruction is called to do the dirty work.
 ///
-void MachineLICM::Hoist(MachineInstr *MI) {
-  MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader) return;
-
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   // First check whether we should hoist this instruction.
   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
     // If not, try unfolding a hoistable load.
     MI = ExtractHoistableLoad(MI);
-    if (!MI) return;
+    if (!MI) return false;
   }
 
   // Now move the instructions to the predecessor, inserting it before any
@@ -791,13 +1146,16 @@ void MachineLICM::Hoist(MachineInstr *MI) {
     // Otherwise, splice the instruction to the preheader.
     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
 
+    // Update register pressure for BBs from header to this block.
+    UpdateBackTraceRegPressure(MI);
+
     // Clear the kill flags of any register this instruction defines,
     // since they may need to be live throughout the entire loop
     // rather than just live for part of it.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg() && MO.isDef() && !MO.isDead())
-        RegInfo->clearKillFlags(MO.getReg());
+        MRI->clearKillFlags(MO.getReg());
     }
 
     // Add to the CSE map.
@@ -812,6 +1170,8 @@ void MachineLICM::Hoist(MachineInstr *MI) {
 
   ++NumHoisted;
   Changed = true;
+
+  return true;
 }
 
 MachineBasicBlock *MachineLICM::getCurPreheader() {
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index bca4b0c28985..189cb2ba5d1d 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,8 +30,11 @@ TEMPLATE_INSTANTIATION(MLIB);
 }
 
 char MachineLoopInfo::ID = 0;
-INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
-                "Machine Natural Loop Construction", true, true);
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
 
 char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 000000000000..17fe67f65045
--- /dev/null
+++ b/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<SlotIndexes>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  Indexes = &getAnalysis<SlotIndexes>();
+  return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+  DeleteContainerSeconds(Cache);
+  Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+  MachineLoopRange *&Range = Cache[Loop];
+  if (!Range)
+    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+  return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+                                   MachineLoopRange::Allocator &alloc,
+                                   SlotIndexes &Indexes)
+  : Loop(loop), Intervals(alloc), Area(0) {
+  // Compute loop coverage.
+  for (MachineLoop::block_iterator I = Loop->block_begin(),
+         E = Loop->block_end(); I != E; ++I) {
+    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+    Intervals.insert(Range.first, Range.second, 1u);
+    Area += Range.first.distance(Range.second);
+  }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+  Map::const_iterator I = Intervals.find(Start);
+  return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+  return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  unsigned na = a->getNumber();
+  unsigned nb = b->getNumber();
+  if (na < nb)
+    return -1;
+  if (na > nb)
+    return 1;
+  return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  if (a->getArea() != b->getArea())
+    return a->getArea() > b->getArea() ? -1 : 1;
+  return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+  OS << "Loop#" << getNumber() << " =";
+  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+    OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+  MLR.print(OS);
+  return OS;
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b647a4dcc530..fadc594efcb2 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm::dwarf;
 
 // Handle the Pass registration stuff necessary to use TargetData's.
 INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
-                "Machine Module Information", false, false);
+                "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
@@ -41,30 +41,30 @@ class MMIAddrLabelMapCallbackPtr : CallbackVH {
 public:
   MMIAddrLabelMapCallbackPtr() : Map(0) {}
   MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
-  
+
   void setPtr(BasicBlock *BB) {
     ValueHandleBase::operator=(BB);
   }
-    
+
   void setMap(MMIAddrLabelMap *map) { Map = map; }
-  
+
   virtual void deleted();
   virtual void allUsesReplacedWith(Value *V2);
 };
-  
+
 class MMIAddrLabelMap {
   MCContext &Context;
   struct AddrLabelSymEntry {
     /// Symbols - The symbols for the label.  This is a pointer union that is
     /// either one symbol (the common case) or a list of symbols.
     PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
-    
+
     Function *Fn;   // The containing function of the BasicBlock.
     unsigned Index; // The index in BBCallbacks for the BasicBlock.
   };
-  
+
   DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-  
+
   /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
   /// use this so we get notified if a block is deleted or RAUWd.
   std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
@@ -76,23 +76,23 @@ class MMIAddrLabelMap {
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
     DeletedAddrLabelsNeedingEmission;
 public:
-  
+
   MMIAddrLabelMap(MCContext &context) : Context(context) {}
   ~MMIAddrLabelMap() {
     assert(DeletedAddrLabelsNeedingEmission.empty() &&
            "Some labels for deleted blocks never got emitted");
-    
+
     // Deallocate any of the 'list of symbols' case.
     for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
          I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
       if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
         delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
   }
-  
+
   MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
   std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
 
-  void takeDeletedSymbolsForFunction(Function *F, 
+  void takeDeletedSymbolsForFunction(Function *F,
                                      std::vector<MCSymbol*> &Result);
 
   void UpdateForDeletedBlock(BasicBlock *BB);
@@ -104,7 +104,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
          "Shouldn't get label for block without address taken");
   AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-  
+
   // If we already had an entry for this block, just return it.
   if (!Entry.Symbols.isNull()) {
     assert(BB->getParent() == Entry.Fn && "Parent changed");
@@ -112,7 +112,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
       return Entry.Symbols.get<MCSymbol*>();
     return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
   }
-  
+
   // Otherwise, this is a new entry, create a new symbol for it and add an
   // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
   BBCallbacks.push_back(BB);
@@ -129,9 +129,9 @@ MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
          "Shouldn't get label for block without address taken");
   AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-  
+
   std::vector<MCSymbol*> Result;
-  
+
   // If we already had an entry for this block, just return it.
   if (Entry.Symbols.isNull())
     Result.push_back(getAddrLabelSymbol(BB));
@@ -152,7 +152,7 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
 
   // If there are no entries for the function, just return.
   if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-  
+
   // Otherwise, take the list.
   std::swap(Result, I->second);
   DeletedAddrLabelsNeedingEmission.erase(I);
@@ -175,7 +175,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
   if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
     if (Sym->isDefined())
       return;
-  
+
     // If the block is not yet defined, we need to emit it at the end of the
     // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
     // for the containing Function.  Since the block is being deleted, its
@@ -187,7 +187,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
     for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
       MCSymbol *Sym = (*Syms)[i];
       if (Sym->isDefined()) continue;  // Ignore already emitted labels.
-      
+
       // If the block is not yet defined, we need to emit it at the end of the
       // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
       // for the containing Function.  Since the block is being deleted, its
@@ -195,7 +195,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
       // 'Entry'.
       DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
     }
-    
+
     // The entry is deleted, free the memory associated with the symbol list.
     delete Syms;
   }
@@ -225,7 +225,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
     SymList->push_back(PrevSym);
     NewEntry.Symbols = SymList;
   }
-      
+
   std::vector<MCSymbol*> *SymList =
     NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
 
@@ -234,7 +234,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
     SymList->push_back(Sym);
     return;
   }
-  
+
   // Otherwise, concatenate the list.
   std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
   SymList->insert(SymList->end(), Syms->begin(), Syms->end());
@@ -253,10 +253,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(ID), Context(MAI),
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+                                     const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
   ObjFileMMI(0),
-  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+  CallsExternalVAFunctionWithFloatingPointArguments(false) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
   AddrLabelSymbols = 0;
@@ -264,7 +267,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
 }
 
 MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
   assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
          "should always be explicitly constructed by LLVMTargetMachine");
   abort();
@@ -272,7 +275,7 @@ MachineModuleInfo::MachineModuleInfo()
 
 MachineModuleInfo::~MachineModuleInfo() {
   delete ObjFileMMI;
-  
+
   // FIXME: Why isn't doFinalization being called??
   //assert(AddrLabelSymbols == 0 && "doFinalization not called");
   delete AddrLabelSymbols;
@@ -472,7 +475,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
            (LPMap && (*LPMap)[BeginLabel] != 0)) &&
           (EndLabel->isDefined() ||
            (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-      
+
       LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
       LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
       --j, --e;
@@ -562,20 +565,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
-
-namespace {
-  /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
-  /// by source location, to avoid depending on the arbitrary order that
-  /// instruction selection visits variables in.
-  struct VariableDebugSorter {
-    bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
-                    const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
-                  const {
-       if (A.second.second.getLine() != B.second.second.getLine())
-         return A.second.second.getLine() < B.second.second.getLine();
-       if (A.second.second.getCol() != B.second.second.getCol())
-         return A.second.second.getCol() < B.second.second.getCol();
-       return false;
-    }
-  };
-}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5d852f26beda..b3fb33736ffc 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -30,8 +30,9 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
 
 MachineRegisterInfo::~MachineRegisterInfo() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
-    assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
   for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
     assert(!PhysRegUseDefLists[i] &&
            "PhysRegUseDefLists has entries after all instructions are deleted");
@@ -44,20 +45,32 @@ MachineRegisterInfo::~MachineRegisterInfo() {
 ///
 void
 MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
-  unsigned VR = Reg;
-  Reg -= TargetRegisterInfo::FirstVirtualRegister;
-  assert(Reg < VRegInfo.size() && "Invalid vreg!");
   const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
   VRegInfo[Reg].first = RC;
 
   // Remove from old register class's vregs list. This may be slow but
   // fortunately this operation is rarely needed.
   std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
-  std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
+  std::vector<unsigned>::iterator I =
+    std::find(VRegs.begin(), VRegs.end(), Reg);
   VRegs.erase(I);
 
   // Add to new register class's vregs list.
-  RegClass2VRegMap[RC->getID()].push_back(VR);
+  RegClass2VRegMap[RC->getID()].push_back(Reg);
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+                                       const TargetRegisterClass *RC) {
+  const TargetRegisterClass *OldRC = getRegClass(Reg);
+  if (OldRC == RC)
+    return RC;
+  const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+  if (!NewRC)
+    return 0;
+  if (NewRC != OldRC)
+    setRegClass(Reg, NewRC);
+  return NewRC;
 }
 
 /// createVirtualRegister - Create and return a new virtual register in the
@@ -66,17 +79,22 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
 unsigned
 MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   assert(RegClass && "Cannot create register without RegClass!");
+
+  // New virtual register number.
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
   // Add a reg, but keep track of whether the vector reallocated or not.
-  void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
-  VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
-  RegAllocHints.push_back(std::make_pair(0, 0));
+  const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+  void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+  VRegInfo.grow(Reg);
+  VRegInfo[Reg].first = RegClass;
+  RegAllocHints.grow(Reg);
 
-  if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+  if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
     // The vector reallocated, handle this now.
     HandleVRegListReallocation();
-  unsigned VR = getLastVirtReg();
-  RegClass2VRegMap[RegClass->getID()].push_back(VR);
-  return VR;
+  RegClass2VRegMap[RegClass->getID()].push_back(Reg);
+  return Reg;
 }
 
 /// HandleVRegListReallocation - We just added a virtual register to the
@@ -85,11 +103,12 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 void MachineRegisterInfo::HandleVRegListReallocation() {
   // The back pointers for the vreg lists point into the previous vector.
   // Update them to point to their correct slots.
-  for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
-    MachineOperand *List = VRegInfo[i].second;
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    MachineOperand *List = VRegInfo[Reg].second;
     if (!List) continue;
     // Update the back-pointer to be accurate once more.
-    List->Contents.Reg.Prev = &VRegInfo[i].second;
+    List->Contents.Reg.Prev = &VRegInfo[Reg].second;
   }
 }
 
@@ -112,8 +131,6 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 /// register or null if none is found.  This assumes that the code is in SSA
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
-  assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
-         "Invalid vreg!");
   // Since we are in SSA form, we can use the first definition.
   if (!def_empty(Reg))
     return &*def_begin(Reg);
@@ -193,8 +210,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
         LiveIns.erase(LiveIns.begin() + i);
         --i; --e;
       } else {
+        DebugLoc DL;
+        // If there is a location for this live in then use it.
+        DenseMap<unsigned, DebugLoc>::iterator DLI = 
+          LiveInLocs.find(LiveIns[i].second);
+        if (DLI != LiveInLocs.end())
+          DL = DLI->second;
+
         // Emit a copy.
-        BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+        BuildMI(*EntryMBB, EntryMBB->begin(), DL,
                 TII.get(TargetOpcode::COPY), LiveIns[i].second)
           .addReg(LiveIns[i].first);
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c8f8fafe227e..8a93a24287b6 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -34,27 +35,31 @@ using namespace llvm;
 static cl::opt<bool> 
 SplitEdges("machine-sink-split",
            cl::desc("Split critical edges during machine sinking"),
-           cl::init(false), cl::Hidden);
-static cl::opt<unsigned>
-SplitLimit("split-limit",
-           cl::init(~0u), cl::Hidden);
+           cl::init(true), cl::Hidden);
 
-STATISTIC(NumSunk,  "Number of machine instructions sunk");
-STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumSunk,      "Number of machine instructions sunk");
+STATISTIC(NumSplit,     "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
 
 namespace {
   class MachineSinking : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    MachineRegisterInfo  *RegInfo; // Machine register information
+    MachineRegisterInfo  *MRI;  // Machine register information
     MachineDominatorTree *DT;   // Machine dominator tree
     MachineLoopInfo *LI;
     AliasAnalysis *AA;
     BitVector AllocatableSet;   // Which physregs are allocatable?
 
+    // Remember which edges have been considered for breaking.
+    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    CEBCandidates;
+
   public:
     static char ID; // Pass identification
-    MachineSinking() : MachineFunctionPass(ID) {}
+    MachineSinking() : MachineFunctionPass(ID) {
+      initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -67,43 +72,125 @@ namespace {
       AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
     }
+
+    virtual void releaseMemory() {
+      CEBCandidates.clear();
+    }
+
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
-    MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
-                                         MachineBasicBlock *To);
+    bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                     MachineBasicBlock *From,
+                                     MachineBasicBlock *To);
+    MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+                                         MachineBasicBlock *From,
+                                         MachineBasicBlock *To,
+                                         bool BreakPHIEdge);
     bool SinkInstruction(MachineInstr *MI, bool &SawStore);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
-                               MachineBasicBlock *DefMBB, bool &LocalUse) const;
+                                 MachineBasicBlock *DefMBB,
+                                 bool &BreakPHIEdge, bool &LocalUse) const;
+    bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                         MachineBasicBlock *MBB);
   };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
-INITIALIZE_PASS(MachineSinking, "machine-sink",
-                "Machine code sinking", false, false);
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
 
 FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
 
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                                     MachineBasicBlock *MBB) {
+  if (!MI->isCopy())
+    return false;
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+      !MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+  const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+  if (SRC != DRC)
+    return false;
+
+  MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+  if (DefMI->isCopyLike())
+    return false;
+  DEBUG(dbgs() << "Coalescing: " << *DefMI);
+  DEBUG(dbgs() << "*** to: " << *MI);
+  MRI->replaceRegWith(DstReg, SrcReg);
+  MI->eraseFromParent();
+  ++NumCoalesces;
+  return true;
+}
+
 /// AllUsesDominatedByBlock - Return true if all uses of the specified register
 /// occur in blocks dominated by the specified block. If any use is in the
 /// definition block, then return false since it is never legal to move def
 /// after uses.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
-                                             MachineBasicBlock *MBB,
-                                             MachineBasicBlock *DefMBB,
-                                             bool &LocalUse) const {
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+                                        MachineBasicBlock *MBB,
+                                        MachineBasicBlock *DefMBB,
+                                        bool &BreakPHIEdge,
+                                        bool &LocalUse) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
+
+  if (MRI->use_nodbg_empty(Reg))
+    return true;
+
   // Ignoring debug uses is necessary so debug info doesn't affect the code.
   // This may leave a referencing dbg_value in the original block, before
   // the definition of the vreg.  Dwarf generator handles this although the
   // user might not get the right info at runtime.
+
+  // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+  // into and they are all PHI nodes. In this case, machine-sink must break
+  // the critical edge first. e.g.
+  //
+  // BB#1: derived from LLVM BB %bb4.preheader
+  //   Predecessors according to CFG: BB#0
+  //     ...
+  //     %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+  //     ...
+  //     JE_4 <BB#37>, %EFLAGS<imp-use>
+  //   Successors according to CFG: BB#37 BB#2
+  //
+  // BB#2: derived from LLVM BB %bb.nph
+  //   Predecessors according to CFG: BB#0 BB#1
+  //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+  BreakPHIEdge = true;
   for (MachineRegisterInfo::use_nodbg_iterator
-         I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
        I != E; ++I) {
-    // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (!(UseBlock == MBB && UseInst->isPHI() &&
+          UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+      BreakPHIEdge = false;
+      break;
+    }
+  }
+  if (BreakPHIEdge)
+    return true;
 
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    // Determine the block of the use.
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
     if (UseInst->isPHI()) {
       // PHI nodes use the operand in the predecessor block, not the block with
       // the PHI.
@@ -127,7 +214,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   const TargetMachine &TM = MF.getTarget();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
-  RegInfo = &MF.getRegInfo();
+  MRI = &MF.getRegInfo();
   DT = &getAnalysis<MachineDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   AA = &getAnalysis<AliasAnalysis>();
@@ -139,6 +226,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     bool MadeChange = false;
 
     // Process all basic blocks.
+    CEBCandidates.clear();
     for (MachineFunction::iterator I = MF.begin(), E = MF.end();
          I != E; ++I)
       MadeChange |= ProcessBlock(*I);
@@ -177,6 +265,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (MI->isDebugValue())
       continue;
 
+    if (PerformTrivialForwardCoalescing(MI, &MBB))
+      continue;
+
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
 
@@ -186,51 +277,92 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
   return MadeChange;
 }
 
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
-                                                     MachineBasicBlock *ToBB) {
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                                 MachineBasicBlock *From,
+                                                 MachineBasicBlock *To) {
+  // FIXME: Need much better heuristics.
+
+  // If the pass has already considered breaking this edge (during this pass
+  // through the function), then let's go ahead and break it. This means
+  // sinking multiple "cheap" instructions into the same block.
+  if (!CEBCandidates.insert(std::make_pair(From, To)))
+    return true;
+
+  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+    return true;
+
+  // MI is cheap, we probably don't want to break the critical edge for it.
+  // However, if this would allow some definitions of its source operands
+  // to be sunk then it's probably worth it.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MRI->hasOneNonDBGUse(Reg))
+      return true;
+  }
+
+  return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+                                                     MachineBasicBlock *FromBB,
+                                                     MachineBasicBlock *ToBB,
+                                                     bool BreakPHIEdge) {
+  if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+    return 0;
+
   // Avoid breaking back edge. From == To means backedge for single BB loop.
-  if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+  if (!SplitEdges || FromBB == ToBB)
+    return 0;
+
+  // Check for backedges of more "complex" loops.
+  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+      LI->isLoopHeader(ToBB))
     return 0;
 
-  // Check for more "complex" loops.
-  if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
-      !LI->isLoopHeader(ToBB)) {
-    // It's not always legal to break critical edges and sink the computation
-    // to the edge.
-    //
-    // BB#1:
-    // v1024
-    // Beq BB#3
-    // <fallthrough>
-    // BB#2:
-    // ... no uses of v1024
-    // <fallthrough>
-    // BB#3:
-    // ...
-    //       = v1024
-    //
-    // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
-    //
-    // BB#1:
-    // ...
-    // Bne BB#2
-    // BB#4:
-    // v1024 =
-    // B BB#3
-    // BB#2:
-    // ... no uses of v1024
-    // <fallthrough>
-    // BB#3:
-    // ...
-    //       = v1024
-    //
-    // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
-    // flow. We need to ensure the new basic block where the computation is
-    // sunk to dominates all the uses.
-    // It's only legal to break critical edge and sink the computation to the
-    // new block if all the predecessors of "To", except for "From", are
-    // not dominated by "From". Given SSA property, this means these
-    // predecessors are dominated by "To".
+  // It's not always legal to break critical edges and sink the computation
+  // to the edge.
+  //
+  // BB#1:
+  // v1024
+  // Beq BB#3
+  // <fallthrough>
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+  //
+  // BB#1:
+  // ...
+  // Bne BB#2
+  // BB#4:
+  // v1024 =
+  // B BB#3
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+  // flow. We need to ensure the new basic block where the computation is
+  // sunk to dominates all the uses.
+  // It's only legal to break critical edge and sink the computation to the
+  // new block if all the predecessors of "To", except for "From", are
+  // not dominated by "From". Given SSA property, this means these
+  // predecessors are dominated by "To".
+  //
+  // There is no need to do this check if all the uses are PHI nodes. PHI
+  // sources are only defined on the specific predecessor edges.
+  if (!BreakPHIEdge) {
     for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
            E = ToBB->pred_end(); PI != E; ++PI) {
       if (*PI == FromBB)
@@ -238,17 +370,23 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
       if (!DT->dominates(ToBB, *PI))
         return 0;
     }
-
-    // FIXME: Determine if it's cost effective to break this edge.
-    return FromBB->SplitCriticalEdge(ToBB, this);
   }
 
-  return 0;
+  return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+  return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
 }
 
 /// SinkInstruction - Determine whether it is safe to sink the specified machine
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
+    return false;
+
   // Check if it's safe to move the instruction.
   if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
@@ -269,6 +407,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // decide.
   MachineBasicBlock *SuccToSinkTo = 0;
 
+  bool BreakPHIEdge = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;  // Ignore non-register operands.
@@ -281,7 +420,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!RegInfo->def_empty(Reg))
+        if (!MRI->def_empty(Reg))
           return false;
 
         if (AllocatableSet.test(Reg))
@@ -290,7 +429,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // Check for a def among the register's aliases too.
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (!RegInfo->def_empty(AliasReg))
+          if (!MRI->def_empty(AliasReg))
             return false;
 
           if (AllocatableSet.test(AliasReg))
@@ -305,7 +444,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
       if (MO.isUse()) continue;
 
       // If it's not safe to move defs of the register class, then abort.
-      if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+      if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
         return false;
 
       // FIXME: This picks a successor to sink into based on having one
@@ -327,7 +466,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If a previous operand picked a block to sink to, then this operand
         // must be sinkable to the same block.
         bool LocalUse = false;
-        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+                                     BreakPHIEdge, LocalUse))
           return false;
 
         continue;
@@ -338,7 +478,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
       for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
            E = ParentBlock->succ_end(); SI != E; ++SI) {
         bool LocalUse = false;
-        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
+        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+                                    BreakPHIEdge, LocalUse)) {
           SuccToSinkTo = *SI;
           break;
         }
@@ -384,7 +525,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
-  // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
@@ -412,10 +552,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
     if (!TryBreak)
       DEBUG(dbgs() << "Sinking along critical edge.\n");
     else {
-      MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+      MachineBasicBlock *NewSucc =
+        SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
       if (!NewSucc) {
-        DEBUG(dbgs() <<
-              " *** PUNTING: Not legal or profitable to break critical edge\n");
+        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+                        "break critical edge\n");
         return false;
       } else {
         DEBUG(dbgs() << " *** Splitting critical edge:"
@@ -424,10 +565,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
               << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
         SuccToSinkTo = NewSucc;
         ++NumSplit;
+        BreakPHIEdge = false;
       }
     }
   }
 
+  if (BreakPHIEdge) {
+    // BreakPHIEdge is true if all the uses are in the successor MBB being
+    // sunken into and they are all PHI nodes. In this case, machine-sink must
+    // break the critical edge first.
+    MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+                                                   SuccToSinkTo, BreakPHIEdge);
+    if (!NewSucc) {
+      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+            "break critical edge\n");
+      return false;
+    }
+
+    DEBUG(dbgs() << " *** Splitting critical edge:"
+          " BB#" << ParentBlock->getNumber()
+          << " -- BB#" << NewSucc->getNumber()
+          << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+    SuccToSinkTo = NewSucc;
+    ++NumSplit;
+  }
+
   // Determine where to insert into. Skip phi nodes.
   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 1e88562935ea..7351119f4728 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Function.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -45,14 +46,16 @@ using namespace llvm;
 namespace {
   struct MachineVerifier {
 
-    MachineVerifier(Pass *pass) :
+    MachineVerifier(Pass *pass, const char *b) :
       PASS(pass),
+      Banner(b),
       OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
       {}
 
     bool runOnMachineFunction(MachineFunction &MF);
 
     Pass *const PASS;
+    const char *Banner;
     const char *const OutFileName;
     raw_ostream *OS;
     const MachineFunction *MF;
@@ -71,6 +74,8 @@ namespace {
     RegVector regsDefined, regsDead, regsKilled;
     RegSet regsLiveInButUnused;
 
+    SlotIndex lastIndex;
+
     // Add Reg and any sub-registers to RV
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
       RV.push_back(Reg);
@@ -167,7 +172,9 @@ namespace {
 
     // Analysis information if available
     LiveVariables *LiveVars;
-    const LiveIntervals *LiveInts;
+    LiveIntervals *LiveInts;
+    LiveStacks *LiveStks;
+    SlotIndexes *Indexes;
 
     void visitMachineFunctionBefore();
     void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -193,9 +200,12 @@ namespace {
 
   struct MachineVerifierPass : public MachineFunctionPass {
     static char ID; // Pass ID, replacement for typeid
+    const char *const Banner;
 
-    MachineVerifierPass()
-      : MachineFunctionPass(ID) {}
+    MachineVerifierPass(const char *b = 0)
+      : MachineFunctionPass(ID), Banner(b) {
+        initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+      }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -203,7 +213,7 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &MF) {
-      MF.verify(this);
+      MF.verify(this, Banner);
       return false;
     }
   };
@@ -212,14 +222,15 @@ namespace {
 
 char MachineVerifierPass::ID = 0;
 INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
-                "Verify generated machine code", false, false);
+                "Verify generated machine code", false, false)
 
-FunctionPass *llvm::createMachineVerifierPass() {
-  return new MachineVerifierPass();
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+  return new MachineVerifierPass(Banner);
 }
 
-void MachineFunction::verify(Pass *p) const {
-  MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+  MachineVerifier(p, Banner)
+    .runOnMachineFunction(const_cast<MachineFunction&>(*this));
 }
 
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -247,11 +258,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
   LiveVars = NULL;
   LiveInts = NULL;
+  LiveStks = NULL;
+  Indexes = NULL;
   if (PASS) {
     LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
     // We don't want to verify LiveVariables if LiveIntervals is available.
     if (!LiveInts)
       LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+    LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+    Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
   }
 
   visitMachineFunctionBefore();
@@ -260,6 +275,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
     visitMachineBasicBlockBefore(MFI);
     for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
            MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+      if (MBBI->getParent() != MFI) {
+        report("Bad instruction parent pointer", MFI);
+        *OS << "Instruction: " << *MBBI;
+        continue;
+      }
       visitMachineInstrBefore(MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
@@ -288,8 +308,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   assert(MF);
   *OS << '\n';
-  if (!foundErrors++)
-    MF->print(*OS);
+  if (!foundErrors++) {
+    if (Banner)
+      *OS << "# " << Banner << '\n';
+    MF->print(*OS, Indexes);
+  }
   *OS << "*** Bad machine code: " << msg << " ***\n"
       << "- function:    " << MF->getFunction()->getNameStr() << "\n";
 }
@@ -299,13 +322,19 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   report(msg, MBB->getParent());
   *OS << "- basic block: " << MBB->getName()
       << " " << (void*)MBB
-      << " (BB#" << MBB->getNumber() << ")\n";
+      << " (BB#" << MBB->getNumber() << ")";
+  if (Indexes)
+    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+        << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
+  *OS << '\n';
 }
 
 void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   assert(MI);
   report(msg, MI->getParent());
   *OS << "- instruction: ";
+  if (Indexes && Indexes->hasIndex(MI))
+    *OS << Indexes->getInstructionIndex(MI) << '\t';
   MI->print(*OS, TM);
 }
 
@@ -329,6 +358,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineFunctionBefore() {
+  lastIndex = SlotIndex();
   regsReserved = TRI->getReservedRegs(*MF);
 
   // A sub-register of a reserved register is also reserved
@@ -357,6 +387,16 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
+  // Count the number of landing pad successors.
+  SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    if ((*I)->isLandingPad())
+      LandingPadSuccs.insert(*I);
+  }
+  if (LandingPadSuccs.size() > 1)
+    report("MBB has more than one landing pad successor", MBB);
+
   // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
@@ -372,14 +412,14 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         // It's possible that the block legitimately ends with a noreturn
         // call or an unreachable, in which case it won't actually fall
         // out the bottom of the function.
-      } else if (MBB->succ_empty()) {
+      } else if (MBB->succ_size() == LandingPadSuccs.size()) {
         // It's possible that the block legitimately ends with a noreturn
         // call or an unreachable, in which case it won't actuall fall
         // out of the block.
-      } else if (MBB->succ_size() != 1) {
+      } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional fall-through but doesn't have "
                "exactly one CFG successor!", MBB);
-      } else if (MBB->succ_begin()[0] != MBBI) {
+      } else if (!MBB->isSuccessor(MBBI)) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
@@ -394,10 +434,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       }
     } else if (TBB && !FBB && Cond.empty()) {
       // Block unconditionally branches somewhere.
-      if (MBB->succ_size() != 1) {
+      if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional branch but doesn't have "
                "exactly one CFG successor!", MBB);
-      } else if (MBB->succ_begin()[0] != TBB) {
+      } else if (!MBB->isSuccessor(TBB)) {
         report("MBB exits via unconditional branch but the CFG "
                "successor doesn't match the actual successor!", MBB);
       }
@@ -487,6 +527,9 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 
   regsKilled.clear();
   regsDefined.clear();
+
+  if (Indexes)
+    lastIndex = Indexes->getMBBStartIdx(MBB);
 }
 
 void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
@@ -525,6 +568,7 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const TargetInstrDesc &TI = MI->getDesc();
+  const TargetOperandInfo &TOI = TI.OpInfo[MONum];
 
   // The first TI.NumDefs operands must be explicit register defines
   if (MONum < TI.getNumDefs()) {
@@ -535,9 +579,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
   } else if (MONum < TI.getNumOperands()) {
-    if (MO->isReg()) {
-      if (MO->isDef())
-        report("Explicit operand marked as def", MO, MONum);
+    // Don't check if it's the last operand in a variadic instruction. See,
+    // e.g., LDM_RET in the arm back end.
+    if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
+      if (MO->isDef() && !TOI.isOptionalDef())
+          report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
         report("Explicit operand marked as implicit", MO, MONum);
     }
@@ -554,7 +600,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       return;
 
     // Check Live Variables.
-    if (MO->isUndef()) {
+    if (MI->isDebugValue()) {
+      // Liveness checks are not valid for debug values.
+    } else if (MO->isUndef()) {
       // An <undef> doesn't refer to any register, so just skip it.
     } else if (MO->isUse()) {
       regsLiveInButUnused.erase(Reg);
@@ -566,7 +614,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         unsigned DefReg = MI->getOperand(defIdx).getReg();
         if (Reg == DefReg) {
           isKill = true;
-          // ANd in that case an explicit kill flag is not allowed.
+          // And in that case an explicit kill flag is not allowed.
           if (MO->isKill())
             report("Illegal kill flag on two-address instruction operand",
                    MO, MONum);
@@ -590,7 +638,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       }
 
       // Check LiveInts liveness and kill.
-      if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+          LiveInts && !LiveInts->isNotInMIMap(MI)) {
         SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
         if (LiveInts->hasInterval(Reg)) {
           const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -598,8 +647,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
             report("No live range at use", MO, MONum);
             *OS << UseIdx << " is not live in " << LI << '\n';
           }
-          // TODO: Verify isKill == LI.killedAt.
-        } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+          // Check for extra kill flags.
+          // Note that we allow missing kill flags for now.
+          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+            report("Live range continues after kill flag", MO, MONum);
+            *OS << "Live range: " << LI << '\n';
+          }
+        } else {
           report("Virtual register has no Live interval", MO, MONum);
         }
       }
@@ -636,11 +690,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
         if (LiveInts->hasInterval(Reg)) {
           const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
-            assert(LR->valno && "NULL valno is not allowed");
-            if (LR->valno->def != DefIdx) {
+          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+            assert(VNI && "NULL valno is not allowed");
+            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
               report("Inconsistent valno->def", MO, MONum);
-              *OS << "Valno " << LR->valno->id << " is not defined at "
+              *OS << "Valno " << VNI->id << " is not defined at "
                   << DefIdx << " in " << LI << '\n';
             }
           } else {
@@ -655,7 +709,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
 
     // Check register classes.
     if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
-      const TargetOperandInfo &TOI = TI.OpInfo[MONum];
       unsigned SubIdx = MO->getSubReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -706,6 +759,22 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
       report("PHI operand is not in the CFG", MO, MONum);
     break;
 
+  case MachineOperand::MO_FrameIndex:
+    if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+      SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+      if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+        report("Instruction loads from dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+      if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+        report("Instruction stores to dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+    }
+    break;
+
   default:
     break;
   }
@@ -717,12 +786,31 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   set_subtract(regsLive, regsKilled); regsKilled.clear();
   set_subtract(regsLive, regsDead);   regsDead.clear();
   set_union(regsLive, regsDefined);   regsDefined.clear();
+
+  if (Indexes && Indexes->hasIndex(MI)) {
+    SlotIndex idx = Indexes->getInstructionIndex(MI);
+    if (!(idx > lastIndex)) {
+      report("Instruction index out of order", MI);
+      *OS << "Last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = idx;
+  }
 }
 
 void
 MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
   MBBInfoMap[MBB].regsLiveOut = regsLive;
   regsLive.clear();
+
+  if (Indexes) {
+    SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+    if (!(stop > lastIndex)) {
+      report("Block ends before last instruction index", MBB);
+      *OS << "Block ends at " << stop
+          << " last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = stop;
+  }
 }
 
 // Calculate the largest possible vregsPassed sets. These are the registers that
@@ -854,8 +942,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
 
 void MachineVerifier::verifyLiveVariables() {
   assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
-  for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
-         RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
     for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
          MFI != MFE; ++MFI) {
@@ -865,13 +953,13 @@ void MachineVerifier::verifyLiveVariables() {
       if (MInfo.vregsRequired.count(Reg)) {
         if (!VI.AliveBlocks.test(MFI->getNumber())) {
           report("LiveVariables: Block missing from AliveBlocks", MFI);
-          *OS << "Virtual register %reg" << Reg
+          *OS << "Virtual register " << PrintReg(Reg)
               << " must be live through the block.\n";
         }
       } else {
         if (VI.AliveBlocks.test(MFI->getNumber())) {
           report("LiveVariables: Block should not be in AliveBlocks", MFI);
-          *OS << "Virtual register %reg" << Reg
+          *OS << "Virtual register " << PrintReg(Reg)
               << " is not needed live through the block.\n";
         }
       }
@@ -884,14 +972,24 @@ void MachineVerifier::verifyLiveIntervals() {
   for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
        LVE = LiveInts->end(); LVI != LVE; ++LVI) {
     const LiveInterval &LI = *LVI->second;
+
+    // Spilling and splitting may leave unused registers around. Skip them.
+    if (MRI->use_empty(LI.reg))
+      continue;
+
+    // Physical registers have much weirdness going on, mostly from coalescing.
+    // We should probably fix it, but for now just ignore them.
+    if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+      continue;
+
     assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
 
     for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
          I!=E; ++I) {
       VNInfo *VNI = *I;
-      const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+      const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
 
-      if (!DefLR) {
+      if (!DefVNI) {
         if (!VNI->isUnused()) {
           report("Valno not live at def and not marked unused", MF);
           *OS << "Valno #" << VNI->id << " in " << LI << '\n';
@@ -902,31 +1000,216 @@ void MachineVerifier::verifyLiveIntervals() {
       if (VNI->isUnused())
         continue;
 
-      if (DefLR->valno != VNI) {
+      if (DefVNI != VNI) {
         report("Live range at def has different valno", MF);
-        DefLR->print(*OS);
-        *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+        continue;
       }
 
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+      if (!MBB) {
+        report("Invalid definition index", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " in " << LI << '\n';
+        continue;
+      }
+
+      if (VNI->isPHIDef()) {
+        if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+          report("PHIDef value is not defined at MBB start", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << ", not at the beginning of BB#" << MBB->getNumber()
+              << " in " << LI << '\n';
+        }
+      } else {
+        // Non-PHI def.
+        const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+        if (!MI) {
+          report("No instruction at def index", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+
+        bool isEarlyClobber = false;
+        if (MI) {
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+                MOI->isEarlyClobber()) {
+              isEarlyClobber = true;
+              break;
+            }
+          }
+        }
+
+        // Early clobber defs begin at USE slots, but other defs must begin at
+        // DEF slots.
+        if (isEarlyClobber) {
+          if (!VNI->def.isUse()) {
+            report("Early clobber def must be at a USE slot", MF);
+            *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+                << " in " << LI << '\n';
+          }
+        } else if (!VNI->def.isDef()) {
+          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        }
+      }
     }
 
     for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
-      const LiveRange &LR = *I;
-      assert(LR.valno && "Live range has no valno");
+      const VNInfo *VNI = I->valno;
+      assert(VNI && "Live range has no valno");
 
-      if (LR.valno->id >= LI.getNumValNums() ||
-          LR.valno != LI.getValNumInfo(LR.valno->id)) {
+      if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
         report("Foreign valno in live range", MF);
-        LR.print(*OS);
+        I->print(*OS);
         *OS << " has a valno not in " << LI << '\n';
       }
 
-      if (LR.valno->isUnused()) {
+      if (VNI->isUnused()) {
         report("Live range valno is marked unused", MF);
-        LR.print(*OS);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+      if (!MBB) {
+        report("Bad start of live segment, no basic block", MF);
+        I->print(*OS);
         *OS << " in " << LI << '\n';
+        continue;
+      }
+      SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+      if (I->start != MBBStartIdx && I->start != VNI->def) {
+        report("Live segment must begin at MBB entry or valno def", MBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+      }
+
+      const MachineBasicBlock *EndMBB =
+                                LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+      if (!EndMBB) {
+        report("Bad end of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+        // The live segment is ending inside EndMBB
+        const MachineInstr *MI =
+                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+        if (!MI) {
+          report("Live segment doesn't end at a valid instruction", EndMBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+                   !MI->readsVirtualRegister(LI.reg)) {
+          // A live range can end with either a redefinition, a kill flag on a
+          // use, or a dead flag on a def.
+          // FIXME: Should we check for each of these?
+          bool hasDeadDef = false;
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+              hasDeadDef = true;
+              break;
+            }
+          }
+
+          if (!hasDeadDef) {
+            report("Instruction killing live segment neither defines nor reads "
+                   "register", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        }
+      }
+
+      // Now check all the basic blocks in this live segment.
+      MachineFunction::const_iterator MFI = MBB;
+      // Is this live range the beginning of a non-PHIDef VN?
+      if (I->start == VNI->def && !VNI->isPHIDef()) {
+        // Not live-in to any blocks.
+        if (MBB == EndMBB)
+          continue;
+        // Skip this block.
+        ++MFI;
+      }
+      for (;;) {
+        assert(LiveInts->isLiveInToMBB(LI, MFI));
+        // We don't know how to track physregs into a landing pad.
+        if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+            MFI->isLandingPad()) {
+          if (&*MFI == EndMBB)
+            break;
+          ++MFI;
+          continue;
+        }
+        // Check that VNI is live-out of all predecessors.
+        for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+             PE = MFI->pred_end(); PI != PE; ++PI) {
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+          if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+            if (PVNI && !PVNI->hasPHIKill()) {
+              report("Value live out of predecessor doesn't have PHIKill", MF);
+              *OS << "Valno #" << PVNI->id << " live out of BB#"
+                  << (*PI)->getNumber() << '@' << PEnd
+                  << " doesn't have PHIKill, but Valno #" << VNI->id
+                  << " is PHIDef and defined at the beginning of BB#"
+                  << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+                  << " in " << LI << '\n';
+            }
+            continue;
+          }
+
+          if (!PVNI) {
+            report("Register not marked live out of predecessor", *PI);
+            *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << PEnd << " in " << LI << '\n';
+            continue;
+          }
+
+          if (PVNI != VNI) {
+            report("Different value live out of predecessor", *PI);
+            *OS << "Valno #" << PVNI->id << " live out of BB#"
+                << (*PI)->getNumber() << '@' << PEnd
+                << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+          }
+        }
+        if (&*MFI == EndMBB)
+          break;
+        ++MFI;
       }
+    }
 
+    // Check the LI only has one connected component.
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+      unsigned NumComp = ConEQ.Classify(&LI);
+      if (NumComp > 1) {
+        report("Multiple connected components in live interval", MF);
+        *OS << NumComp << " components in " << LI << '\n';
+        for (unsigned comp = 0; comp != NumComp; ++comp) {
+          *OS << comp << ": valnos";
+          for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+               E = LI.vni_end(); I!=E; ++I)
+            if (comp == ConEQ.getEqClass(*I))
+              *OS << ' ' << (*I)->id;
+          *OS << '\n';
+        }
+      }
     }
   }
 }
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index edb4eea71b8a..c05be130ec61 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,9 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    OptimizePHIs() : MachineFunctionPass(ID) {}
+    OptimizePHIs() : MachineFunctionPass(ID) {
+      initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -55,7 +57,7 @@ namespace {
 
 char OptimizePHIs::ID = 0;
 INITIALIZE_PASS(OptimizePHIs, "opt-phis",
-                "Optimize machine instruction PHIs", false, false);
+                "Optimize machine instruction PHIs", false, false)
 
 FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
 
diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h
deleted file mode 100644
index b2224cb051dc..000000000000
--- a/lib/CodeGen/PBQP/Graph.h
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Graph class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
-#define LLVM_CODEGEN_PBQP_GRAPH_H
-
-#include "Math.h"
-
-#include <list>
-#include <vector>
-#include <map>
-
-namespace PBQP {
-
-  /// PBQP Graph class.
-  /// Instances of this class describe PBQP problems.
-  class Graph {
-  private:
-
-    // ----- TYPEDEFS -----
-    class NodeEntry;
-    class EdgeEntry;
-
-    typedef std::list<NodeEntry> NodeList;
-    typedef std::list<EdgeEntry> EdgeList;
-
-  public:
-
-    typedef NodeList::iterator NodeItr;
-    typedef NodeList::const_iterator ConstNodeItr;
-
-    typedef EdgeList::iterator EdgeItr;
-    typedef EdgeList::const_iterator ConstEdgeItr;
-
-  private:
-
-    typedef std::list<EdgeItr> AdjEdgeList;
-  
-  public:
-
-    typedef AdjEdgeList::iterator AdjEdgeItr;
-
-  private:
-
-    class NodeEntry {
-    private:
-      Vector costs;      
-      AdjEdgeList adjEdges;
-      unsigned degree;
-      void *data;
-    public:
-      NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
-      Vector& getCosts() { return costs; }
-      const Vector& getCosts() const { return costs; }
-      unsigned getDegree() const { return degree; }
-      AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
-      AdjEdgeItr edgesEnd() { return adjEdges.end(); }
-      AdjEdgeItr addEdge(EdgeItr e) {
-        ++degree;
-        return adjEdges.insert(adjEdges.end(), e);
-      }
-      void removeEdge(AdjEdgeItr ae) {
-        --degree;
-        adjEdges.erase(ae);
-      }
-      void setData(void *data) { this->data = data; }
-      void* getData() { return data; }
-    };
-
-    class EdgeEntry {
-    private:
-      NodeItr node1, node2;
-      Matrix costs;
-      AdjEdgeItr node1AEItr, node2AEItr;
-      void *data;
-    public:
-      EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
-        : node1(node1), node2(node2), costs(costs) {}
-      NodeItr getNode1() const { return node1; }
-      NodeItr getNode2() const { return node2; }
-      Matrix& getCosts() { return costs; }
-      const Matrix& getCosts() const { return costs; }
-      void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
-      AdjEdgeItr getNode1AEItr() { return node1AEItr; }
-      void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
-      AdjEdgeItr getNode2AEItr() { return node2AEItr; }
-      void setData(void *data) { this->data = data; }
-      void *getData() { return data; }
-    };
-
-    // ----- MEMBERS -----
-
-    NodeList nodes;
-    unsigned numNodes;
-
-    EdgeList edges;
-    unsigned numEdges;
-
-    // ----- INTERNAL METHODS -----
-
-    NodeEntry& getNode(NodeItr nItr) { return *nItr; }
-    const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
-
-    EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
-    const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
-
-    NodeItr addConstructedNode(const NodeEntry &n) {
-      ++numNodes;
-      return nodes.insert(nodes.end(), n);
-    }
-
-    EdgeItr addConstructedEdge(const EdgeEntry &e) {
-      assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
-             "Attempt to add duplicate edge.");
-      ++numEdges;
-      EdgeItr edgeItr = edges.insert(edges.end(), e);
-      EdgeEntry &ne = getEdge(edgeItr);
-      NodeEntry &n1 = getNode(ne.getNode1());
-      NodeEntry &n2 = getNode(ne.getNode2());
-      // Sanity check on matrix dimensions:
-      assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
-             (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
-             "Edge cost dimensions do not match node costs dimensions.");
-      ne.setNode1AEItr(n1.addEdge(edgeItr));
-      ne.setNode2AEItr(n2.addEdge(edgeItr));
-      return edgeItr;
-    }
-
-    inline void copyFrom(const Graph &other);
-  public:
-
-    /// \brief Construct an empty PBQP graph.
-    Graph() : numNodes(0), numEdges(0) {}
-
-    /// \brief Copy construct this graph from "other". Note: Does not copy node
-    ///        and edge data, only graph structure and costs.
-    /// @param other Source graph to copy from.
-    Graph(const Graph &other) : numNodes(0), numEdges(0) {
-      copyFrom(other);
-    }
-
-    /// \brief Make this graph a copy of "other". Note: Does not copy node and
-    ///        edge data, only graph structure and costs.
-    /// @param other The graph to copy from.
-    /// @return A reference to this graph.
-    ///
-    /// This will clear the current graph, erasing any nodes and edges added,
-    /// before copying from other.
-    Graph& operator=(const Graph &other) {
-      clear();      
-      copyFrom(other);
-      return *this;
-    }
-
-    /// \brief Add a node with the given costs.
-    /// @param costs Cost vector for the new node.
-    /// @return Node iterator for the added node.
-    NodeItr addNode(const Vector &costs) {
-      return addConstructedNode(NodeEntry(costs));
-    }
-
-    /// \brief Add an edge between the given nodes with the given costs.
-    /// @param n1Itr First node.
-    /// @param n2Itr Second node.
-    /// @return Edge iterator for the added edge.
-    EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
-                    const Matrix &costs) {
-      assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
-             getNodeCosts(n2Itr).getLength() == costs.getCols() &&
-             "Matrix dimensions mismatch.");
-      return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); 
-    }
-
-    /// \brief Get the number of nodes in the graph.
-    /// @return Number of nodes in the graph.
-    unsigned getNumNodes() const { return numNodes; }
-
-    /// \brief Get the number of edges in the graph.
-    /// @return Number of edges in the graph.
-    unsigned getNumEdges() const { return numEdges; }
-
-    /// \brief Get a node's cost vector.
-    /// @param nItr Node iterator.
-    /// @return Node cost vector.
-    Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
-
-    /// \brief Get a node's cost vector (const version).
-    /// @param nItr Node iterator.
-    /// @return Node cost vector.
-    const Vector& getNodeCosts(ConstNodeItr nItr) const {
-      return getNode(nItr).getCosts();
-    }
-
-    /// \brief Set a node's data pointer.
-    /// @param nItr Node iterator.
-    /// @param data Pointer to node data.
-    ///
-    /// Typically used by a PBQP solver to attach data to aid in solution.
-    void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
-
-    /// \brief Get the node's data pointer.
-    /// @param nItr Node iterator.
-    /// @return Pointer to node data.
-    void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
-    
-    /// \brief Get an edge's cost matrix.
-    /// @param eItr Edge iterator.
-    /// @return Edge cost matrix.
-    Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
-
-    /// \brief Get an edge's cost matrix (const version).
-    /// @param eItr Edge iterator.
-    /// @return Edge cost matrix.
-    const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
-      return getEdge(eItr).getCosts();
-    }
-
-    /// \brief Set an edge's data pointer.
-    /// @param eItr Edge iterator.
-    /// @param data Pointer to edge data.
-    ///
-    /// Typically used by a PBQP solver to attach data to aid in solution.
-    void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
-
-    /// \brief Get an edge's data pointer.
-    /// @param eItr Edge iterator.
-    /// @return Pointer to edge data. 
-    void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
-
-    /// \brief Get a node's degree.
-    /// @param nItr Node iterator.
-    /// @return The degree of the node.
-    unsigned getNodeDegree(NodeItr nItr) const {
-      return getNode(nItr).getDegree();
-    }
-
-    /// \brief Begin iterator for node set.
-    NodeItr nodesBegin() { return nodes.begin(); }
-
-    /// \brief Begin const iterator for node set.
-    ConstNodeItr nodesBegin() const { return nodes.begin(); }
-
-    /// \brief End iterator for node set.
-    NodeItr nodesEnd() { return nodes.end(); }
-
-    /// \brief End const iterator for node set.
-    ConstNodeItr nodesEnd() const { return nodes.end(); }
-
-    /// \brief Begin iterator for edge set.
-    EdgeItr edgesBegin() { return edges.begin(); }
-
-    /// \brief End iterator for edge set.
-    EdgeItr edgesEnd() { return edges.end(); }
-
-    /// \brief Get begin iterator for adjacent edge set.
-    /// @param nItr Node iterator.
-    /// @return Begin iterator for the set of edges connected to the given node.
-    AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
-      return getNode(nItr).edgesBegin();
-    }
-
-    /// \brief Get end iterator for adjacent edge set.
-    /// @param nItr Node iterator.
-    /// @return End iterator for the set of edges connected to the given node.
-    AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
-      return getNode(nItr).edgesEnd();
-    }
-
-    /// \brief Get the first node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @return The first node connected to the given edge. 
-    NodeItr getEdgeNode1(EdgeItr eItr) {
-      return getEdge(eItr).getNode1();
-    }
-
-    /// \brief Get the second node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @return The second node connected to the given edge. 
-    NodeItr getEdgeNode2(EdgeItr eItr) {
-      return getEdge(eItr).getNode2();
-    } 
-
-    /// \brief Get the "other" node connected to this edge.
-    /// @param eItr Edge iterator.
-    /// @param nItr Node iterator for the "given" node.
-    /// @return The iterator for the "other" node connected to this edge. 
-    NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
-      EdgeEntry &e = getEdge(eItr);
-      if (e.getNode1() == nItr) {
-        return e.getNode2();
-      } // else
-      return e.getNode1();
-    }
-
-    /// \brief Get the edge connecting two nodes.
-    /// @param n1Itr First node iterator.
-    /// @param n2Itr Second node iterator.
-    /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
-    ///         otherwise returns edgesEnd(). 
-    EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
-      for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
-         aeItr != aeEnd; ++aeItr) {
-        if ((getEdgeNode1(*aeItr) == n2Itr) ||
-            (getEdgeNode2(*aeItr) == n2Itr)) {
-          return *aeItr;
-        }
-      }
-      return edges.end();
-    }
-
-    /// \brief Remove a node from the graph.
-    /// @param nItr Node iterator.
-    void removeNode(NodeItr nItr) {
-      NodeEntry &n = getNode(nItr);
-      for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
-        EdgeItr eItr = *itr;
-        ++itr;
-        removeEdge(eItr); 
-      }
-      nodes.erase(nItr);
-      --numNodes;
-    }
-
-    /// \brief Remove an edge from the graph.
-    /// @param eItr Edge iterator.
-    void removeEdge(EdgeItr eItr) {
-      EdgeEntry &e = getEdge(eItr);
-      NodeEntry &n1 = getNode(e.getNode1());
-      NodeEntry &n2 = getNode(e.getNode2());
-      n1.removeEdge(e.getNode1AEItr());
-      n2.removeEdge(e.getNode2AEItr());
-      edges.erase(eItr);
-      --numEdges;
-    }
-
-    /// \brief Remove all nodes and edges from the graph.
-    void clear() {
-      nodes.clear();
-      edges.clear();
-      numNodes = numEdges = 0;
-    }
-
-    /// \brief Print a representation of this graph in DOT format.
-    /// @param os Output stream to print on.
-    template <typename OStream>
-    void printDot(OStream &os) {
-    
-      os << "graph {\n";
-
-      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
-           nodeItr != nodeEnd; ++nodeItr) {
-
-        os << "  node" << nodeItr << " [ label=\""
-           << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
-      }
-
-      os << "  edge [ len=" << getNumNodes() << " ]\n";
-
-      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
-           edgeItr != edgeEnd; ++edgeItr) {
-
-        os << "  node" << getEdgeNode1(edgeItr)
-           << " -- node" << getEdgeNode2(edgeItr)
-           << " [ label=\"";
-
-        const Matrix &edgeCosts = getEdgeCosts(edgeItr);
-
-        for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
-          os << edgeCosts.getRowAsVector(i) << "\\n";
-        }
-        os << "\" ]\n";
-      }
-      os << "}\n";
-    }
-
-  };
-
-  class NodeItrComparator {
-  public:
-    bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
-      return &*n1 < &*n2;
-    }
-
-    bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
-      return &*n1 < &*n2;
-    }
-  };
-
-  class EdgeItrCompartor {
-  public:
-    bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
-      return &*e1 < &*e2;
-    }
-
-    bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
-      return &*e1 < &*e2;
-    }
-  };
-
-  void Graph::copyFrom(const Graph &other) {
-    std::map<Graph::ConstNodeItr, Graph::NodeItr,
-             NodeItrComparator> nodeMap;
-
-     for (Graph::ConstNodeItr nItr = other.nodesBegin(),
-                             nEnd = other.nodesEnd();
-         nItr != nEnd; ++nItr) {
-      nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
-    }
-      
-  }
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
deleted file mode 100644
index 791c227f0d07..000000000000
--- a/lib/CodeGen/PBQP/HeuristicBase.h
+++ /dev/null
@@ -1,246 +0,0 @@
-//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-
-#include "HeuristicSolver.h"
-
-namespace PBQP {
-
-  /// \brief Abstract base class for heuristic implementations.
-  ///
-  /// This class provides a handy base for heuristic implementations with common
-  /// solver behaviour implemented for a number of methods.
-  ///
-  /// To implement your own heuristic using this class as a base you'll have to
-  /// implement, as a minimum, the following methods:
-  /// <ul>
-  ///   <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
-  ///        heuristic reduction list.
-  ///   <li> void heuristicReduce() : Perform a single heuristic reduction.
-  ///   <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
-  ///        change to the cost matrix on the given edge (by R2).
-  ///   <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new 
-  ///        costs on the given edge.
-  ///   <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
-  ///        edge into the PBQP graph (by R2).
-  ///   <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
-  ///        disconnection of the given edge from the given node.
-  ///   <li> A constructor for your derived class : to pass back a reference to
-  ///        the solver which is using this heuristic.
-  /// </ul>
-  ///
-  /// These methods are implemented in this class for documentation purposes,
-  /// but will assert if called.
-  /// 
-  /// Note that this class uses the curiously recursive template idiom to
-  /// forward calls to the derived class. These methods need not be made
-  /// virtual, and indeed probably shouldn't for performance reasons.
-  ///
-  /// You'll also need to provide NodeData and EdgeData structs in your class.
-  /// These can be used to attach data relevant to your heuristic to each
-  /// node/edge in the PBQP graph.
-
-  template <typename HImpl>
-  class HeuristicBase {
-  private:
-
-    typedef std::list<Graph::NodeItr> OptimalList;
-
-    HeuristicSolverImpl<HImpl> &s;
-    Graph &g;
-    OptimalList optimalList;
-
-    // Return a reference to the derived heuristic.
-    HImpl& impl() { return static_cast<HImpl&>(*this); }
-
-    // Add the given node to the optimal reductions list. Keep an iterator to
-    // its location for fast removal. 
-    void addToOptimalReductionList(Graph::NodeItr nItr) {
-      optimalList.insert(optimalList.end(), nItr);
-    }
-
-  public:
-
-    /// \brief Construct an instance with a reference to the given solver.
-    /// @param solver The solver which is using this heuristic instance.
-    HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
-      : s(solver), g(s.getGraph()) { }
-
-    /// \brief Get the solver which is using this heuristic instance.
-    /// @return The solver which is using this heuristic instance.
-    ///
-    /// You can use this method to get access to the solver in your derived
-    /// heuristic implementation.
-    HeuristicSolverImpl<HImpl>& getSolver() { return s; }
-
-    /// \brief Get the graph representing the problem to be solved.
-    /// @return The graph representing the problem to be solved.
-    Graph& getGraph() { return g; }
-
-    /// \brief Tell the solver to simplify the graph before the reduction phase.
-    /// @return Whether or not the solver should run a simplification phase
-    ///         prior to the main setup and reduction.
-    ///
-    /// HeuristicBase returns true from this method as it's a sensible default,
-    /// however you can over-ride it in your derived class if you want different
-    /// behaviour.
-    bool solverRunSimplify() const { return true; }
-
-    /// \brief Decide whether a node should be optimally or heuristically 
-    ///        reduced.
-    /// @return Whether or not the given node should be listed for optimal
-    ///         reduction (via R0, R1 or R2).
-    ///
-    /// HeuristicBase returns true for any node with degree less than 3. This is
-    /// sane and sensible for many situations, but not all. You can over-ride
-    /// this method in your derived class if you want a different selection
-    /// criteria. Note however that your criteria for selecting optimal nodes
-    /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
-    /// higher should not be selected under any circumstances.
-    bool shouldOptimallyReduce(Graph::NodeItr nItr) {
-      if (g.getNodeDegree(nItr) < 3)
-        return true;
-      // else
-      return false;
-    }
-
-    /// \brief Add the given node to the list of nodes to be optimally reduced.
-    /// @return nItr Node iterator to be added.
-    ///
-    /// You probably don't want to over-ride this, except perhaps to record
-    /// statistics before calling this implementation. HeuristicBase relies on
-    /// its behaviour.
-    void addToOptimalReduceList(Graph::NodeItr nItr) {
-      optimalList.push_back(nItr);
-    }
-
-    /// \brief Initialise the heuristic.
-    ///
-    /// HeuristicBase iterates over all nodes in the problem and adds them to
-    /// the appropriate list using addToOptimalReduceList or
-    /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
-    ///
-    /// This behaviour should be fine for most situations.
-    void setup() {
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-        if (impl().shouldOptimallyReduce(nItr)) {
-          addToOptimalReduceList(nItr);
-        } else {
-          impl().addToHeuristicReduceList(nItr);
-        }
-      }
-    }
-
-    /// \brief Optimally reduce one of the nodes in the optimal reduce list.
-    /// @return True if a reduction takes place, false if the optimal reduce
-    ///         list is empty.
-    ///
-    /// Selects a node from the optimal reduce list and removes it, applying
-    /// R0, R1 or R2 as appropriate based on the selected node's degree.
-    bool optimalReduce() {
-      if (optimalList.empty())
-        return false;
-
-      Graph::NodeItr nItr = optimalList.front();
-      optimalList.pop_front();
-
-      switch (s.getSolverDegree(nItr)) {
-        case 0: s.applyR0(nItr); break;
-        case 1: s.applyR1(nItr); break;
-        case 2: s.applyR2(nItr); break;
-        default: assert(false &&
-                        "Optimal reductions of degree > 2 nodes is invalid.");
-      }
-
-      return true;
-    }
-
-    /// \brief Perform the PBQP reduction process.
-    ///
-    /// Reduces the problem to the empty graph by repeated application of the
-    /// reduction rules R0, R1, R2 and RN.
-    /// R0, R1 or R2 are always applied if possible before RN is used.
-    void reduce() {
-      bool finished = false;
-
-      while (!finished) {
-        if (!optimalReduce()) {
-          if (impl().heuristicReduce()) {
-            getSolver().recordRN();
-          } else {
-            finished = true;
-          }
-        }
-      }
-    }
-
-    /// \brief Add a node to the heuristic reduce list.
-    /// @param nItr Node iterator to add to the heuristic reduce list.
-    void addToHeuristicList(Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Heuristically reduce one of the nodes in the heuristic
-    ///        reduce list.
-    /// @return True if a reduction takes place, false if the heuristic reduce
-    ///         list is empty.
-    void heuristicReduce() {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Prepare a change in the costs on the given edge.
-    /// @param eItr Edge iterator.    
-    void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle the change in the costs on the given edge.
-    /// @param eItr Edge iterator.
-    void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle the addition of a new edge into the PBQP graph.
-    /// @param eItr Edge iterator for the added edge.
-    void handleAddEdge(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Handle disconnection of an edge from a node.
-    /// @param eItr Edge iterator for edge being disconnected.
-    /// @param nItr Node iterator for the node being disconnected from.
-    ///
-    /// Edges are frequently removed due to the removal of a node. This
-    /// method allows for the effect to be computed only for the remaining
-    /// node in the graph.
-    void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
-    }
-
-    /// \brief Clean up any structures used by HeuristicBase.
-    ///
-    /// At present this just performs a sanity check: that the optimal reduce
-    /// list is empty now that reduction has completed.
-    ///
-    /// If your derived class has more complex structures which need tearing
-    /// down you should over-ride this method but include a call back to this
-    /// implementation.
-    void cleanup() {
-      assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
-    }
-
-  };
-
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
deleted file mode 100644
index 35514f967478..000000000000
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ /dev/null
@@ -1,616 +0,0 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Heuristic PBQP solver. This solver is able to perform optimal reductions for
-// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
-// used to select a node for reduction. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-
-#include "Graph.h"
-#include "Solution.h"
-#include <vector>
-#include <limits>
-
-namespace PBQP {
-
-  /// \brief Heuristic PBQP solver implementation.
-  ///
-  /// This class should usually be created (and destroyed) indirectly via a call
-  /// to HeuristicSolver<HImpl>::solve(Graph&).
-  /// See the comments for HeuristicSolver.
-  ///
-  /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
-  /// backpropagation phase, and maintains the internal copy of the graph on
-  /// which the reduction is carried out (the original being kept to facilitate
-  /// backpropagation).
-  template <typename HImpl>
-  class HeuristicSolverImpl {
-  private:
-
-    typedef typename HImpl::NodeData HeuristicNodeData;
-    typedef typename HImpl::EdgeData HeuristicEdgeData;
-
-    typedef std::list<Graph::EdgeItr> SolverEdges;
-
-  public:
-  
-    /// \brief Iterator type for edges in the solver graph.
-    typedef SolverEdges::iterator SolverEdgeItr;
-
-  private:
-
-    class NodeData {
-    public:
-      NodeData() : solverDegree(0) {}
-
-      HeuristicNodeData& getHeuristicData() { return hData; }
-
-      SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
-        ++solverDegree;
-        return solverEdges.insert(solverEdges.end(), eItr);
-      }
-
-      void removeSolverEdge(SolverEdgeItr seItr) {
-        --solverDegree;
-        solverEdges.erase(seItr);
-      }
-
-      SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
-      SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
-      unsigned getSolverDegree() const { return solverDegree; }
-      void clearSolverEdges() {
-        solverDegree = 0;
-        solverEdges.clear(); 
-      }
-      
-    private:
-      HeuristicNodeData hData;
-      unsigned solverDegree;
-      SolverEdges solverEdges;
-    };
- 
-    class EdgeData {
-    public:
-      HeuristicEdgeData& getHeuristicData() { return hData; }
-
-      void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
-        this->n1SolverEdgeItr = n1SolverEdgeItr;
-      }
-
-      SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
-
-      void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
-        this->n2SolverEdgeItr = n2SolverEdgeItr;
-      }
-
-      SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
-
-    private:
-
-      HeuristicEdgeData hData;
-      SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
-    };
-
-    Graph &g;
-    HImpl h;
-    Solution s;
-    std::vector<Graph::NodeItr> stack;
-
-    typedef std::list<NodeData> NodeDataList;
-    NodeDataList nodeDataList;
-
-    typedef std::list<EdgeData> EdgeDataList;
-    EdgeDataList edgeDataList;
-
-  public:
-
-    /// \brief Construct a heuristic solver implementation to solve the given
-    ///        graph.
-    /// @param g The graph representing the problem instance to be solved.
-    HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}  
-
-    /// \brief Get the graph being solved by this solver.
-    /// @return The graph representing the problem instance being solved by this
-    ///         solver.
-    Graph& getGraph() { return g; }
-
-    /// \brief Get the heuristic data attached to the given node.
-    /// @param nItr Node iterator.
-    /// @return The heuristic data attached to the given node.
-    HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).getHeuristicData();
-    }
-
-    /// \brief Get the heuristic data attached to the given edge.
-    /// @param eItr Edge iterator.
-    /// @return The heuristic data attached to the given node.
-    HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
-      return getSolverEdgeData(eItr).getHeuristicData();
-    }
-
-    /// \brief Begin iterator for the set of edges adjacent to the given node in
-    ///        the solver graph.
-    /// @param nItr Node iterator.
-    /// @return Begin iterator for the set of edges adjacent to the given node
-    ///         in the solver graph. 
-    SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).solverEdgesBegin();
-    }
-
-    /// \brief End iterator for the set of edges adjacent to the given node in
-    ///        the solver graph.
-    /// @param nItr Node iterator.
-    /// @return End iterator for the set of edges adjacent to the given node in
-    ///         the solver graph. 
-    SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
-      return getSolverNodeData(nItr).solverEdgesEnd();
-    }
-
-    /// \brief Remove a node from the solver graph.
-    /// @param eItr Edge iterator for edge to be removed.
-    ///
-    /// Does <i>not</i> notify the heuristic of the removal. That should be
-    /// done manually if necessary.
-    void removeSolverEdge(Graph::EdgeItr eItr) {
-      EdgeData &eData = getSolverEdgeData(eItr);
-      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
-               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
-      n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
-      n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
-    }
-
-    /// \brief Compute a solution to the PBQP problem instance with which this
-    ///        heuristic solver was constructed.
-    /// @return A solution to the PBQP problem.
-    ///
-    /// Performs the full PBQP heuristic solver algorithm, including setup,
-    /// calls to the heuristic (which will call back to the reduction rules in
-    /// this class), and cleanup.
-    Solution computeSolution() {
-      setup();
-      h.setup();
-      h.reduce();
-      backpropagate();
-      h.cleanup();
-      cleanup();
-      return s;
-    }
-
-    /// \brief Add to the end of the stack.
-    /// @param nItr Node iterator to add to the reduction stack.
-    void pushToStack(Graph::NodeItr nItr) {
-      getSolverNodeData(nItr).clearSolverEdges();
-      stack.push_back(nItr);
-    }
-
-    /// \brief Returns the solver degree of the given node.
-    /// @param nItr Node iterator for which degree is requested.
-    /// @return Node degree in the <i>solver</i> graph (not the original graph).
-    unsigned getSolverDegree(Graph::NodeItr nItr) {
-      return  getSolverNodeData(nItr).getSolverDegree();
-    }
-
-    /// \brief Set the solution of the given node.
-    /// @param nItr Node iterator to set solution for.
-    /// @param selection Selection for node.
-    void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
-      s.setSelection(nItr, selection);
-
-      for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
-                             aeEnd = g.adjEdgesEnd(nItr);
-           aeItr != aeEnd; ++aeItr) {
-        Graph::EdgeItr eItr(*aeItr);
-        Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
-        getSolverNodeData(anItr).addSolverEdge(eItr);
-      }
-    }
-
-    /// \brief Apply rule R0.
-    /// @param nItr Node iterator for node to apply R0 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR0(Graph::NodeItr nItr) {
-      assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
-             "R0 applied to node with degree != 0.");
-
-      // Nothing to do. Just push the node onto the reduction stack.
-      pushToStack(nItr);
-
-      s.recordR0();
-    }
-
-    /// \brief Apply rule R1.
-    /// @param xnItr Node iterator for node to apply R1 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR1(Graph::NodeItr xnItr) {
-      NodeData &nd = getSolverNodeData(xnItr);
-      assert(nd.getSolverDegree() == 1 &&
-             "R1 applied to node with degree != 1.");
-
-      Graph::EdgeItr eItr = *nd.solverEdgesBegin();
-
-      const Matrix &eCosts = g.getEdgeCosts(eItr);
-      const Vector &xCosts = g.getNodeCosts(xnItr);
-      
-      // Duplicate a little to avoid transposing matrices.
-      if (xnItr == g.getEdgeNode1(eItr)) {
-        Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
-        Vector &yCosts = g.getNodeCosts(ynItr);
-        for (unsigned j = 0; j < yCosts.getLength(); ++j) {
-          PBQPNum min = eCosts[0][j] + xCosts[0];
-          for (unsigned i = 1; i < xCosts.getLength(); ++i) {
-            PBQPNum c = eCosts[i][j] + xCosts[i];
-            if (c < min)
-              min = c;
-          }
-          yCosts[j] += min;
-        }
-        h.handleRemoveEdge(eItr, ynItr);
-     } else {
-        Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
-        Vector &yCosts = g.getNodeCosts(ynItr);
-        for (unsigned i = 0; i < yCosts.getLength(); ++i) {
-          PBQPNum min = eCosts[i][0] + xCosts[0];
-          for (unsigned j = 1; j < xCosts.getLength(); ++j) {
-            PBQPNum c = eCosts[i][j] + xCosts[j];
-            if (c < min)
-              min = c;
-          }
-          yCosts[i] += min;
-        }
-        h.handleRemoveEdge(eItr, ynItr);
-      }
-      removeSolverEdge(eItr);
-      assert(nd.getSolverDegree() == 0 &&
-             "Degree 1 with edge removed should be 0.");
-      pushToStack(xnItr);
-      s.recordR1();
-    }
-
-    /// \brief Apply rule R2.
-    /// @param xnItr Node iterator for node to apply R2 to.
-    ///
-    /// Node will be automatically pushed to the solver stack.
-    void applyR2(Graph::NodeItr xnItr) {
-      assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
-             "R2 applied to node with degree != 2.");
-
-      NodeData &nd = getSolverNodeData(xnItr);
-      const Vector &xCosts = g.getNodeCosts(xnItr);
-
-      SolverEdgeItr aeItr = nd.solverEdgesBegin();
-      Graph::EdgeItr yxeItr = *aeItr,
-                     zxeItr = *(++aeItr);
-
-      Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
-                     znItr = g.getEdgeOtherNode(zxeItr, xnItr);
-
-      bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
-           flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
-
-      const Matrix *yxeCosts = flipEdge1 ?
-        new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
-        &g.getEdgeCosts(yxeItr);
-
-      const Matrix *zxeCosts = flipEdge2 ?
-        new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
-        &g.getEdgeCosts(zxeItr);
-
-      unsigned xLen = xCosts.getLength(),
-               yLen = yxeCosts->getRows(),
-               zLen = zxeCosts->getRows();
-               
-      Matrix delta(yLen, zLen);
-
-      for (unsigned i = 0; i < yLen; ++i) {
-        for (unsigned j = 0; j < zLen; ++j) {
-          PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
-          for (unsigned k = 1; k < xLen; ++k) {
-            PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
-            if (c < min) {
-              min = c;
-            }
-          }
-          delta[i][j] = min;
-        }
-      }
-
-      if (flipEdge1)
-        delete yxeCosts;
-
-      if (flipEdge2)
-        delete zxeCosts;
-
-      Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
-      bool addedEdge = false;
-
-      if (yzeItr == g.edgesEnd()) {
-        yzeItr = g.addEdge(ynItr, znItr, delta);
-        addedEdge = true;
-      } else {
-        Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
-        h.preUpdateEdgeCosts(yzeItr);
-        if (ynItr == g.getEdgeNode1(yzeItr)) {
-          yzeCosts += delta;
-        } else {
-          yzeCosts += delta.transpose();
-        }
-      }
-
-      bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
-
-      if (!addedEdge) {
-        // If we modified the edge costs let the heuristic know.
-        h.postUpdateEdgeCosts(yzeItr);
-      }
- 
-      if (nullCostEdge) {
-        // If this edge ended up null remove it.
-        if (!addedEdge) {
-          // We didn't just add it, so we need to notify the heuristic
-          // and remove it from the solver.
-          h.handleRemoveEdge(yzeItr, ynItr);
-          h.handleRemoveEdge(yzeItr, znItr);
-          removeSolverEdge(yzeItr);
-        }
-        g.removeEdge(yzeItr);
-      } else if (addedEdge) {
-        // If the edge was added, and non-null, finish setting it up, add it to
-        // the solver & notify heuristic.
-        edgeDataList.push_back(EdgeData());
-        g.setEdgeData(yzeItr, &edgeDataList.back());
-        addSolverEdge(yzeItr);
-        h.handleAddEdge(yzeItr);
-      }
-
-      h.handleRemoveEdge(yxeItr, ynItr);
-      removeSolverEdge(yxeItr);
-      h.handleRemoveEdge(zxeItr, znItr);
-      removeSolverEdge(zxeItr);
-
-      pushToStack(xnItr);
-      s.recordR2();
-    }
-
-    /// \brief Record an application of the RN rule.
-    ///
-    /// For use by the HeuristicBase.
-    void recordRN() { s.recordRN(); } 
-
-  private:
-
-    NodeData& getSolverNodeData(Graph::NodeItr nItr) {
-      return *static_cast<NodeData*>(g.getNodeData(nItr));
-    }
-
-    EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
-      return *static_cast<EdgeData*>(g.getEdgeData(eItr));
-    }
-
-    void addSolverEdge(Graph::EdgeItr eItr) {
-      EdgeData &eData = getSolverEdgeData(eItr);
-      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
-               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
-      eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
-      eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
-    }
-
-    void setup() {
-      if (h.solverRunSimplify()) {
-        simplify();
-      }
-
-      // Create node data objects.
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-        nodeDataList.push_back(NodeData());
-        g.setNodeData(nItr, &nodeDataList.back());
-      }
-
-      // Create edge data objects.
-      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
-           eItr != eEnd; ++eItr) {
-        edgeDataList.push_back(EdgeData());
-        g.setEdgeData(eItr, &edgeDataList.back());
-        addSolverEdge(eItr);
-      }
-    }
-
-    void simplify() {
-      disconnectTrivialNodes();
-      eliminateIndependentEdges();
-    }
-
-    // Eliminate trivial nodes.
-    void disconnectTrivialNodes() {
-      unsigned numDisconnected = 0;
-
-      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
-           nItr != nEnd; ++nItr) {
-
-        if (g.getNodeCosts(nItr).getLength() == 1) {
-
-          std::vector<Graph::EdgeItr> edgesToRemove;
-
-          for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
-                                 aeEnd = g.adjEdgesEnd(nItr);
-               aeItr != aeEnd; ++aeItr) {
-
-            Graph::EdgeItr eItr = *aeItr;
-
-            if (g.getEdgeNode1(eItr) == nItr) {
-              Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
-              g.getNodeCosts(otherNodeItr) +=
-                g.getEdgeCosts(eItr).getRowAsVector(0);
-            }
-            else {
-              Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
-              g.getNodeCosts(otherNodeItr) +=
-                g.getEdgeCosts(eItr).getColAsVector(0);
-            }
-
-            edgesToRemove.push_back(eItr);
-          }
-
-          if (!edgesToRemove.empty())
-            ++numDisconnected;
-
-          while (!edgesToRemove.empty()) {
-            g.removeEdge(edgesToRemove.back());
-            edgesToRemove.pop_back();
-          }
-        }
-      }
-    }
-
-    void eliminateIndependentEdges() {
-      std::vector<Graph::EdgeItr> edgesToProcess;
-      unsigned numEliminated = 0;
-
-      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
-           eItr != eEnd; ++eItr) {
-        edgesToProcess.push_back(eItr);
-      }
-
-      while (!edgesToProcess.empty()) {
-        if (tryToEliminateEdge(edgesToProcess.back()))
-          ++numEliminated;
-        edgesToProcess.pop_back();
-      }
-    }
-
-    bool tryToEliminateEdge(Graph::EdgeItr eItr) {
-      if (tryNormaliseEdgeMatrix(eItr)) {
-        g.removeEdge(eItr);
-        return true; 
-      }
-      return false;
-    }
-
-    bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
-
-      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
-
-      Matrix &edgeCosts = g.getEdgeCosts(eItr);
-      Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
-             &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
-
-      for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-        PBQPNum rowMin = infinity;
-
-        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
-            rowMin = edgeCosts[r][c];
-        }
-
-        uCosts[r] += rowMin;
-
-        if (rowMin != infinity) {
-          edgeCosts.subFromRow(r, rowMin);
-        }
-        else {
-          edgeCosts.setRow(r, 0);
-        }
-      }
-
-      for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-        PBQPNum colMin = infinity;
-
-        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
-            colMin = edgeCosts[r][c];
-        }
-
-        vCosts[c] += colMin;
-
-        if (colMin != infinity) {
-          edgeCosts.subFromCol(c, colMin);
-        }
-        else {
-          edgeCosts.setCol(c, 0);
-        }
-      }
-
-      return edgeCosts.isZero();
-    }
-
-    void backpropagate() {
-      while (!stack.empty()) {
-        computeSolution(stack.back());
-        stack.pop_back();
-      }
-    }
-
-    void computeSolution(Graph::NodeItr nItr) {
-
-      NodeData &nodeData = getSolverNodeData(nItr);
-
-      Vector v(g.getNodeCosts(nItr));
-
-      // Solve based on existing solved edges.
-      for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
-                         solvedEdgeEnd = nodeData.solverEdgesEnd();
-           solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
-
-        Graph::EdgeItr eItr(*solvedEdgeItr);
-        Matrix &edgeCosts = g.getEdgeCosts(eItr);
-
-        if (nItr == g.getEdgeNode1(eItr)) {
-          Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
-          unsigned adjSolution = s.getSelection(adjNode);
-          v += edgeCosts.getColAsVector(adjSolution);
-        }
-        else {
-          Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
-          unsigned adjSolution = s.getSelection(adjNode);
-          v += edgeCosts.getRowAsVector(adjSolution);
-        }
-
-      }
-
-      setSolution(nItr, v.minIndex());
-    }
-
-    void cleanup() {
-      h.cleanup();
-      nodeDataList.clear();
-      edgeDataList.clear();
-    }
-  };
-
-  /// \brief PBQP heuristic solver class.
-  ///
-  /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
-  /// by calling
-  /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
-  ///
-  /// The choice of heuristic for the H parameter will affect both the solver
-  /// speed and solution quality. The heuristic should be chosen based on the
-  /// nature of the problem being solved.
-  /// Currently the only solver included with LLVM is the Briggs heuristic for
-  /// register allocation.
-  template <typename HImpl>
-  class HeuristicSolver {
-  public:
-    static Solution solve(Graph &g) {
-      HeuristicSolverImpl<HImpl> hs(g);
-      return hs.computeSolution();
-    }
-  };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
deleted file mode 100644
index 18eaf7c0da9b..000000000000
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ /dev/null
@@ -1,460 +0,0 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class implements the Briggs test for "allocability" of nodes in a
-// PBQP graph representing a register allocation problem. Nodes which can be
-// proven allocable (by a safe and relatively accurate test) are removed from
-// the PBQP graph first. If no provably allocable node is present in the graph
-// then the node with the minimal spill-cost to degree ratio is removed.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-
-#include "../HeuristicSolver.h"
-#include "../HeuristicBase.h"
-
-#include <set>
-#include <limits>
-
-namespace PBQP {
-  namespace Heuristics {
-
-    /// \brief PBQP Heuristic which applies an allocability test based on
-    ///        Briggs.
-    /// 
-    /// This heuristic assumes that the elements of cost vectors in the PBQP
-    /// problem represent storage options, with the first being the spill
-    /// option and subsequent elements representing legal registers for the
-    /// corresponding node. Edge cost matrices are likewise assumed to represent
-    /// register constraints.
-    /// If one or more nodes can be proven allocable by this heuristic (by
-    /// inspection of their constraint matrices) then the allocable node of
-    /// highest degree is selected for the next reduction and pushed to the
-    /// solver stack. If no nodes can be proven allocable then the node with
-    /// the lowest estimated spill cost is selected and push to the solver stack
-    /// instead.
-    /// 
-    /// This implementation is built on top of HeuristicBase.       
-    class Briggs : public HeuristicBase<Briggs> {
-    private:
-
-      class LinkDegreeComparator {
-      public:
-        LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
-        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
-          if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
-            return true;
-          return false;
-        }
-      private:
-        HeuristicSolverImpl<Briggs> *s;
-      };
-
-      class SpillCostComparator {
-      public:
-        SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
-          : s(&s), g(&s.getGraph()) {}
-        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
-          PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
-                  cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
-          if (cost1 < cost2)
-            return true;
-          return false;
-        }
-
-      private:
-        HeuristicSolverImpl<Briggs> *s;
-        Graph *g;
-      };
-
-      typedef std::list<Graph::NodeItr> RNAllocableList;
-      typedef RNAllocableList::iterator RNAllocableListItr;
-
-      typedef std::list<Graph::NodeItr> RNUnallocableList;  
-      typedef RNUnallocableList::iterator RNUnallocableListItr;
-
-    public:
-
-      struct NodeData {
-        typedef std::vector<unsigned> UnsafeDegreesArray;
-        bool isHeuristic, isAllocable, isInitialized;
-        unsigned numDenied, numSafe;
-        UnsafeDegreesArray unsafeDegrees;
-        RNAllocableListItr rnaItr;
-        RNUnallocableListItr rnuItr;
-
-        NodeData()
-          : isHeuristic(false), isAllocable(false), isInitialized(false),
-            numDenied(0), numSafe(0) { }
-      };
-
-      struct EdgeData {
-        typedef std::vector<unsigned> UnsafeArray;
-        unsigned worst, reverseWorst;
-        UnsafeArray unsafe, reverseUnsafe;
-        bool isUpToDate;
-
-        EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
-      };
-
-      /// \brief Construct an instance of the Briggs heuristic.
-      /// @param solver A reference to the solver which is using this heuristic.
-      Briggs(HeuristicSolverImpl<Briggs> &solver) :
-        HeuristicBase<Briggs>(solver) {}
-
-      /// \brief Determine whether a node should be reduced using optimal
-      ///        reduction.
-      /// @param nItr Node iterator to be considered.
-      /// @return True if the given node should be optimally reduced, false
-      ///         otherwise.
-      ///
-      /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
-      /// exception. Nodes whose spill cost (element 0 of their cost vector) is
-      /// infinite are checked for allocability first. Allocable nodes may be
-      /// optimally reduced, but nodes whose allocability cannot be proven are
-      /// selected for heuristic reduction instead.
-      bool shouldOptimallyReduce(Graph::NodeItr nItr) {
-        if (getSolver().getSolverDegree(nItr) < 3) {
-          return true;
-        }
-        // else
-        return false;
-      }
-
-      /// \brief Add a node to the heuristic reduce list.
-      /// @param nItr Node iterator to add to the heuristic reduce list.
-      void addToHeuristicReduceList(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-        initializeNode(nItr);
-        nd.isHeuristic = true;
-        if (nd.isAllocable) {
-          nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
-        } else {
-          nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
-        }
-      }
-
-      /// \brief Heuristically reduce one of the nodes in the heuristic
-      ///        reduce list.
-      /// @return True if a reduction takes place, false if the heuristic reduce
-      ///         list is empty.
-      ///
-      /// If the list of allocable nodes is non-empty a node is selected
-      /// from it and pushed to the stack. Otherwise if the non-allocable list
-      /// is non-empty a node is selected from it and pushed to the stack.
-      /// If both lists are empty the method simply returns false with no action
-      /// taken.
-      bool heuristicReduce() {
-        if (!rnAllocableList.empty()) {
-          RNAllocableListItr rnaItr =
-            min_element(rnAllocableList.begin(), rnAllocableList.end(),
-                        LinkDegreeComparator(getSolver()));
-          Graph::NodeItr nItr = *rnaItr;
-          rnAllocableList.erase(rnaItr);
-          handleRemoveNode(nItr);
-          getSolver().pushToStack(nItr);
-          return true;
-        } else if (!rnUnallocableList.empty()) {
-          RNUnallocableListItr rnuItr =
-            min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
-                        SpillCostComparator(getSolver()));
-          Graph::NodeItr nItr = *rnuItr;
-          rnUnallocableList.erase(rnuItr);
-          handleRemoveNode(nItr);
-          getSolver().pushToStack(nItr);
-          return true;
-        }
-        // else
-        return false;
-      }
-
-      /// \brief Prepare a change in the costs on the given edge.
-      /// @param eItr Edge iterator.    
-      void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
-        Graph &g = getGraph();
-        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
-                       n2Itr = g.getEdgeNode2(eItr);
-        NodeData &n1 = getHeuristicNodeData(n1Itr),
-                 &n2 = getHeuristicNodeData(n2Itr);
-
-        if (n1.isHeuristic)
-          subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
-        if (n2.isHeuristic)
-          subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
-
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-        ed.isUpToDate = false;
-      }
-
-      /// \brief Handle the change in the costs on the given edge.
-      /// @param eItr Edge iterator.
-      void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
-        // This is effectively the same as adding a new edge now, since
-        // we've factored out the costs of the old one.
-        handleAddEdge(eItr);
-      }
-
-      /// \brief Handle the addition of a new edge into the PBQP graph.
-      /// @param eItr Edge iterator for the added edge.
-      ///
-      /// Updates allocability of any nodes connected by this edge which are
-      /// being managed by the heuristic. If allocability changes they are
-      /// moved to the appropriate list.
-      void handleAddEdge(Graph::EdgeItr eItr) {
-        Graph &g = getGraph();
-        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
-                       n2Itr = g.getEdgeNode2(eItr);
-        NodeData &n1 = getHeuristicNodeData(n1Itr),
-                 &n2 = getHeuristicNodeData(n2Itr);
-
-        // If neither node is managed by the heuristic there's nothing to be
-        // done.
-        if (!n1.isHeuristic && !n2.isHeuristic)
-          return;
-
-        // Ok - we need to update at least one node.
-        computeEdgeContributions(eItr);
-
-        // Update node 1 if it's managed by the heuristic.
-        if (n1.isHeuristic) {
-          bool n1WasAllocable = n1.isAllocable;
-          addEdgeContributions(eItr, n1Itr);
-          updateAllocability(n1Itr);
-          if (n1WasAllocable && !n1.isAllocable) {
-            rnAllocableList.erase(n1.rnaItr);
-            n1.rnuItr =
-              rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
-          }
-        }
-
-        // Likewise for node 2.
-        if (n2.isHeuristic) {
-          bool n2WasAllocable = n2.isAllocable;
-          addEdgeContributions(eItr, n2Itr);
-          updateAllocability(n2Itr);
-          if (n2WasAllocable && !n2.isAllocable) {
-            rnAllocableList.erase(n2.rnaItr);
-            n2.rnuItr =
-              rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
-          }
-        }
-      }
-
-      /// \brief Handle disconnection of an edge from a node.
-      /// @param eItr Edge iterator for edge being disconnected.
-      /// @param nItr Node iterator for the node being disconnected from.
-      ///
-      /// Updates allocability of the given node and, if appropriate, moves the
-      /// node to a new list.
-      void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-
-        // If the node is not managed by the heuristic there's nothing to be
-        // done.
-        if (!nd.isHeuristic)
-          return;
-
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-        (void)ed;
-        assert(ed.isUpToDate && "Edge data is not up to date.");
-
-        // Update node.
-        bool ndWasAllocable = nd.isAllocable;
-        subtractEdgeContributions(eItr, nItr);
-        updateAllocability(nItr);
-
-        // If the node has gone optimal...
-        if (shouldOptimallyReduce(nItr)) {
-          nd.isHeuristic = false;
-          addToOptimalReduceList(nItr);
-          if (ndWasAllocable) {
-            rnAllocableList.erase(nd.rnaItr);
-          } else {
-            rnUnallocableList.erase(nd.rnuItr);
-          }
-        } else {
-          // Node didn't go optimal, but we might have to move it
-          // from "unallocable" to "allocable".
-          if (!ndWasAllocable && nd.isAllocable) {
-            rnUnallocableList.erase(nd.rnuItr);
-            nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
-          }
-        }
-      }
-
-    private:
-
-      NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
-        return getSolver().getHeuristicNodeData(nItr);
-      }
-
-      EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
-        return getSolver().getHeuristicEdgeData(eItr);
-      }
-
-      // Work out what this edge will contribute to the allocability of the
-      // nodes connected to it.
-      void computeEdgeContributions(Graph::EdgeItr eItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        if (ed.isUpToDate)
-          return; // Edge data is already up to date.
-
-        Matrix &eCosts = getGraph().getEdgeCosts(eItr);
-
-        unsigned numRegs = eCosts.getRows() - 1,
-                 numReverseRegs = eCosts.getCols() - 1;
-
-        std::vector<unsigned> rowInfCounts(numRegs, 0),
-                              colInfCounts(numReverseRegs, 0);        
-
-        ed.worst = 0;
-        ed.reverseWorst = 0;
-        ed.unsafe.clear();
-        ed.unsafe.resize(numRegs, 0);
-        ed.reverseUnsafe.clear();
-        ed.reverseUnsafe.resize(numReverseRegs, 0);
-
-        for (unsigned i = 0; i < numRegs; ++i) {
-          for (unsigned j = 0; j < numReverseRegs; ++j) {
-            if (eCosts[i + 1][j + 1] ==
-                  std::numeric_limits<PBQPNum>::infinity()) {
-              ed.unsafe[i] = 1;
-              ed.reverseUnsafe[j] = 1;
-              ++rowInfCounts[i];
-              ++colInfCounts[j];
-
-              if (colInfCounts[j] > ed.worst) {
-                ed.worst = colInfCounts[j];
-              }
-
-              if (rowInfCounts[i] > ed.reverseWorst) {
-                ed.reverseWorst = rowInfCounts[i];
-              }
-            }
-          }
-        }
-
-        ed.isUpToDate = true;
-      }
-
-      // Add the contributions of the given edge to the given node's 
-      // numDenied and safe members. No action is taken other than to update
-      // these member values. Once updated these numbers can be used by clients
-      // to update the node's allocability.
-      void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        
-        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
-        EdgeData::UnsafeArray &unsafe =
-          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
-        nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
-
-        for (unsigned r = 0; r < numRegs; ++r) {
-          if (unsafe[r]) {
-            if (nd.unsafeDegrees[r]==0) {
-              --nd.numSafe;
-            }
-            ++nd.unsafeDegrees[r];
-          }
-        }
-      }
-
-      // Subtract the contributions of the given edge to the given node's 
-      // numDenied and safe members. No action is taken other than to update
-      // these member values. Once updated these numbers can be used by clients
-      // to update the node's allocability.
-      void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-        EdgeData &ed = getHeuristicEdgeData(eItr);
-
-        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        
-        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
-        EdgeData::UnsafeArray &unsafe =
-          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
-        nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
-
-        for (unsigned r = 0; r < numRegs; ++r) {
-          if (unsafe[r]) { 
-            if (nd.unsafeDegrees[r] == 1) {
-              ++nd.numSafe;
-            }
-            --nd.unsafeDegrees[r];
-          }
-        }
-      }
-
-      void updateAllocability(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-        nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
-      }
-
-      void initializeNode(Graph::NodeItr nItr) {
-        NodeData &nd = getHeuristicNodeData(nItr);
-
-        if (nd.isInitialized)
-          return; // Node data is already up to date.
-
-        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
-        nd.numDenied = 0;
-        nd.numSafe = numRegs;
-        nd.unsafeDegrees.resize(numRegs, 0);
-
-        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-
-        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
-                           aeEnd = getSolver().solverEdgesEnd(nItr);
-             aeItr != aeEnd; ++aeItr) {
-          
-          Graph::EdgeItr eItr = *aeItr;
-          computeEdgeContributions(eItr);
-          addEdgeContributions(eItr, nItr);
-        }
-
-        updateAllocability(nItr);
-        nd.isInitialized = true;
-      }
-
-      void handleRemoveNode(Graph::NodeItr xnItr) {
-        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-        std::vector<Graph::EdgeItr> edgesToRemove;
-        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
-                           aeEnd = getSolver().solverEdgesEnd(xnItr);
-             aeItr != aeEnd; ++aeItr) {
-          Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
-          handleRemoveEdge(*aeItr, ynItr);
-          edgesToRemove.push_back(*aeItr);
-        }
-        while (!edgesToRemove.empty()) {
-          getSolver().removeSolverEdge(edgesToRemove.back());
-          edgesToRemove.pop_back();
-        }
-      }
-
-      RNAllocableList rnAllocableList;
-      RNUnallocableList rnUnallocableList;
-    };
-
-  }
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/Math.h b/lib/CodeGen/PBQP/Math.h
deleted file mode 100644
index e7598bf3e3f1..000000000000
--- a/lib/CodeGen/PBQP/Math.h
+++ /dev/null
@@ -1,288 +0,0 @@
-//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_MATH_H 
-#define LLVM_CODEGEN_PBQP_MATH_H
-
-#include <cassert>
-#include <algorithm>
-#include <functional>
-
-namespace PBQP {
-
-typedef float PBQPNum;
-
-/// \brief PBQP Vector class.
-class Vector {
-  public:
-
-    /// \brief Construct a PBQP vector of the given size.
-    explicit Vector(unsigned length) :
-      length(length), data(new PBQPNum[length]) {
-      }
-
-    /// \brief Construct a PBQP vector with initializer.
-    Vector(unsigned length, PBQPNum initVal) :
-      length(length), data(new PBQPNum[length]) {
-        std::fill(data, data + length, initVal);
-      }
-
-    /// \brief Copy construct a PBQP vector.
-    Vector(const Vector &v) :
-      length(v.length), data(new PBQPNum[length]) {
-        std::copy(v.data, v.data + length, data);
-      }
-
-    /// \brief Destroy this vector, return its memory.
-    ~Vector() { delete[] data; }
-
-    /// \brief Assignment operator.
-    Vector& operator=(const Vector &v) {
-      delete[] data;
-      length = v.length;
-      data = new PBQPNum[length];
-      std::copy(v.data, v.data + length, data);
-      return *this;
-    }
-
-    /// \brief Return the length of the vector
-    unsigned getLength() const {
-      return length;
-    }
-
-    /// \brief Element access.
-    PBQPNum& operator[](unsigned index) {
-      assert(index < length && "Vector element access out of bounds.");
-      return data[index];
-    }
-
-    /// \brief Const element access.
-    const PBQPNum& operator[](unsigned index) const {
-      assert(index < length && "Vector element access out of bounds.");
-      return data[index];
-    }
-
-    /// \brief Add another vector to this one.
-    Vector& operator+=(const Vector &v) {
-      assert(length == v.length && "Vector length mismatch.");
-      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
-      return *this;
-    }
-
-    /// \brief Subtract another vector from this one.
-    Vector& operator-=(const Vector &v) {
-      assert(length == v.length && "Vector length mismatch.");
-      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
-      return *this;
-    }
-
-    /// \brief Returns the index of the minimum value in this vector
-    unsigned minIndex() const {
-      return std::min_element(data, data + length) - data;
-    }
-
-  private:
-    unsigned length;
-    PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given vector on the given
-///        output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Vector &v) {
-  assert((v.getLength() != 0) && "Zero-length vector badness.");
-
-  os << "[ " << v[0];
-  for (unsigned i = 1; i < v.getLength(); ++i) {
-    os << ", " << v[i];
-  }
-  os << " ]";
-
-  return os;
-} 
-
-
-/// \brief PBQP Matrix class
-class Matrix {
-  public:
-
-    /// \brief Construct a PBQP Matrix with the given dimensions.
-    Matrix(unsigned rows, unsigned cols) :
-      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
-    }
-
-    /// \brief Construct a PBQP Matrix with the given dimensions and initial
-    /// value.
-    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
-      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
-        std::fill(data, data + (rows * cols), initVal);
-    }
-
-    /// \brief Copy construct a PBQP matrix.
-    Matrix(const Matrix &m) :
-      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
-        std::copy(m.data, m.data + (rows * cols), data);  
-    }
-
-    /// \brief Destroy this matrix, return its memory.
-    ~Matrix() { delete[] data; }
-
-    /// \brief Assignment operator.
-    Matrix& operator=(const Matrix &m) {
-      delete[] data;
-      rows = m.rows; cols = m.cols;
-      data = new PBQPNum[rows * cols];
-      std::copy(m.data, m.data + (rows * cols), data);
-      return *this;
-    }
-
-    /// \brief Return the number of rows in this matrix.
-    unsigned getRows() const { return rows; }
-
-    /// \brief Return the number of cols in this matrix.
-    unsigned getCols() const { return cols; }
-
-    /// \brief Matrix element access.
-    PBQPNum* operator[](unsigned r) {
-      assert(r < rows && "Row out of bounds.");
-      return data + (r * cols);
-    }
-
-    /// \brief Matrix element access.
-    const PBQPNum* operator[](unsigned r) const {
-      assert(r < rows && "Row out of bounds.");
-      return data + (r * cols);
-    }
-
-    /// \brief Returns the given row as a vector.
-    Vector getRowAsVector(unsigned r) const {
-      Vector v(cols);
-      for (unsigned c = 0; c < cols; ++c)
-        v[c] = (*this)[r][c];
-      return v; 
-    }
-
-    /// \brief Returns the given column as a vector.
-    Vector getColAsVector(unsigned c) const {
-      Vector v(rows);
-      for (unsigned r = 0; r < rows; ++r)
-        v[r] = (*this)[r][c];
-      return v;
-    }
-
-    /// \brief Reset the matrix to the given value.
-    Matrix& reset(PBQPNum val = 0) {
-      std::fill(data, data + (rows * cols), val);
-      return *this;
-    }
-
-    /// \brief Set a single row of this matrix to the given value.
-    Matrix& setRow(unsigned r, PBQPNum val) {
-      assert(r < rows && "Row out of bounds.");
-      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
-      return *this;
-    }
-
-    /// \brief Set a single column of this matrix to the given value.
-    Matrix& setCol(unsigned c, PBQPNum val) {
-      assert(c < cols && "Column out of bounds.");
-      for (unsigned r = 0; r < rows; ++r)
-        (*this)[r][c] = val;
-      return *this;
-    }
-
-    /// \brief Matrix transpose.
-    Matrix transpose() const {
-      Matrix m(cols, rows);
-      for (unsigned r = 0; r < rows; ++r)
-        for (unsigned c = 0; c < cols; ++c)
-          m[c][r] = (*this)[r][c];
-      return m;
-    }
-
-    /// \brief Returns the diagonal of the matrix as a vector.
-    ///
-    /// Matrix must be square.
-    Vector diagonalize() const {
-      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
-
-      Vector v(rows);
-      for (unsigned r = 0; r < rows; ++r)
-        v[r] = (*this)[r][r];
-      return v;
-    } 
-
-    /// \brief Add the given matrix to this one.
-    Matrix& operator+=(const Matrix &m) {
-      assert(rows == m.rows && cols == m.cols &&
-          "Matrix dimensions mismatch.");
-      std::transform(data, data + (rows * cols), m.data, data,
-          std::plus<PBQPNum>());
-      return *this;
-    }
-
-    /// \brief Returns the minimum of the given row
-    PBQPNum getRowMin(unsigned r) const {
-      assert(r < rows && "Row out of bounds");
-      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
-    }
-
-    /// \brief Returns the minimum of the given column
-    PBQPNum getColMin(unsigned c) const {
-      PBQPNum minElem = (*this)[0][c];
-      for (unsigned r = 1; r < rows; ++r)
-        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
-      return minElem;
-    }
-
-    /// \brief Subtracts the given scalar from the elements of the given row.
-    Matrix& subFromRow(unsigned r, PBQPNum val) {
-      assert(r < rows && "Row out of bounds");
-      std::transform(data + (r * cols), data + ((r + 1) * cols),
-          data + (r * cols),
-          std::bind2nd(std::minus<PBQPNum>(), val));
-      return *this;
-    }
-
-    /// \brief Subtracts the given scalar from the elements of the given column.
-    Matrix& subFromCol(unsigned c, PBQPNum val) {
-      for (unsigned r = 0; r < rows; ++r)
-        (*this)[r][c] -= val;
-      return *this;
-    }
-
-    /// \brief Returns true if this is a zero matrix.
-    bool isZero() const {
-      return find_if(data, data + (rows * cols),
-          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
-        data + (rows * cols);
-    }
-
-  private:
-    unsigned rows, cols;
-    PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given matrix on the given
-///        output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Matrix &m) {
-
-  assert((m.getRows() != 0) && "Zero-row matrix badness.");
-
-  for (unsigned i = 0; i < m.getRows(); ++i) {
-    os << m.getRowAsVector(i);
-  }
-
-  return os;
-}
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
deleted file mode 100644
index 047fd04c7cb8..000000000000
--- a/lib/CodeGen/PBQP/Solution.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Solution class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
-#define LLVM_CODEGEN_PBQP_SOLUTION_H
-
-#include "Math.h"
-#include "Graph.h"
-
-#include <map>
-
-namespace PBQP {
-
-  /// \brief Represents a solution to a PBQP problem.
-  ///
-  /// To get the selection for each node in the problem use the getSelection method.
-  class Solution {
-  private:
-
-    typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
-    SelectionsMap selections;
-
-    unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
-
-  public:
-
-    /// \brief Number of nodes for which selections have been made.
-    /// @return Number of nodes for which selections have been made.
-    unsigned numNodes() const { return selections.size(); }
-
-    /// \brief Records a reduction via the R0 rule. Should be called from the
-    ///        solver only.
-    void recordR0() { ++r0Reductions; }
-
-    /// \brief Returns the number of R0 reductions applied to solve the problem.
-    unsigned numR0Reductions() const { return r0Reductions; }
-
-    /// \brief Records a reduction via the R1 rule. Should be called from the
-    ///        solver only.
-    void recordR1() { ++r1Reductions; }
-
-    /// \brief Returns the number of R1 reductions applied to solve the problem.
-    unsigned numR1Reductions() const { return r1Reductions; }
-
-    /// \brief Records a reduction via the R2 rule. Should be called from the
-    ///        solver only.
-    void recordR2() { ++r2Reductions; }
-
-    /// \brief Returns the number of R2 reductions applied to solve the problem.
-    unsigned numR2Reductions() const { return r2Reductions; }
-
-    /// \brief Records a reduction via the RN rule. Should be called from the
-    ///        solver only.
-    void recordRN() { ++ rNReductions; }
-
-    /// \brief Returns the number of RN reductions applied to solve the problem.
-    unsigned numRNReductions() const { return rNReductions; }
-
-    /// \brief Set the selection for a given node.
-    /// @param nItr Node iterator.
-    /// @param selection Selection for nItr.
-    void setSelection(Graph::NodeItr nItr, unsigned selection) {
-      selections[nItr] = selection;
-    }
-
-    /// \brief Get a node's selection.
-    /// @param nItr Node iterator.
-    /// @return The selection for nItr;
-    unsigned getSelection(Graph::NodeItr nItr) const {
-      SelectionsMap::const_iterator sItr = selections.find(nItr);
-      assert(sItr != selections.end() && "No selection for node.");
-      return sItr->second;
-    }
-
-  };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index d4df4c548711..5f7cf582c960 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,7 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
-#include "PHIElimination.h"
+#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -34,23 +34,72 @@
 #include <map>
 using namespace llvm;
 
+namespace {
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo *MRI; // Machine register information
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(ID) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    /// Split critical edges where necessary for good coalescer performance.
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+                       LiveVariables &LV, MachineLoopInfo *MLI);
+
+    typedef std::pair<unsigned, unsigned> BBVRegPair;
+    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+    // Map reusable lowered PHI node -> incoming join register.
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
+    LoweredPHIMap LoweredPHIs;
+  };
+}
+
 STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
 INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
-                "Eliminate PHI nodes for register allocation", false, false);
+                "Eliminate PHI nodes for register allocation", false, false)
 
-char &llvm::PHIEliminationID = PHIElimination::ID;
+char& llvm::PHIEliminationID = PHIElimination::ID;
 
-void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addPreserved<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
 
   bool Changed = false;
@@ -93,14 +142,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
 /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
 /// predecessor basic blocks.
 ///
-bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
                                              MachineBasicBlock &MBB) {
   if (MBB.empty() || !MBB.front().isPHI())
     return false;   // Quick exit for basic blocks without PHIs.
 
   // Get an iterator to the first instruction after the last PHI node (this may
   // also be the end of the basic block).
-  MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+  MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
 
   while (MBB.front().isPHI())
     LowerAtomicPHINode(MBB, AfterPHIsIt);
@@ -121,58 +170,14 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
   return true;
 }
 
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
-// when following the CFG edge to SuccMBB. This needs to be after any def of
-// SrcReg, but before any subsequent point where control flow might jump out of
-// the basic block.
-MachineBasicBlock::iterator
-llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                          MachineBasicBlock &SuccMBB,
-                                          unsigned SrcReg) {
-  // Handle the trivial case trivially.
-  if (MBB.empty())
-    return MBB.begin();
-
-  // Usually, we just want to insert the copy before the first terminator
-  // instruction. However, for the edge going to a landing pad, we must insert
-  // the copy before the call/invoke instruction.
-  if (!SuccMBB.isLandingPad())
-    return MBB.getFirstTerminator();
-
-  // Discover any defs/uses in this basic block.
-  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *DefUseMI = &*RI;
-    if (DefUseMI->getParent() == &MBB)
-      DefUsesInMBB.insert(DefUseMI);
-  }
 
-  MachineBasicBlock::iterator InsertPoint;
-  if (DefUsesInMBB.empty()) {
-    // No defs.  Insert the copy at the start of the basic block.
-    InsertPoint = MBB.begin();
-  } else if (DefUsesInMBB.size() == 1) {
-    // Insert the copy immediately after the def/use.
-    InsertPoint = *DefUsesInMBB.begin();
-    ++InsertPoint;
-  } else {
-    // Insert the copy immediately after the last def/use.
-    InsertPoint = MBB.end();
-    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
-    ++InsertPoint;
-  }
-
-  // Make sure the copy goes after any phi nodes however.
-  return SkipPHIsAndLabels(MBB, InsertPoint);
-}
 
 /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
 /// under the assuption that it needs to be lowered in a way that supports
 /// atomic execution of PHIs.  This lowering method is always correct all of the
 /// time.
 ///
-void llvm::PHIElimination::LowerAtomicPHINode(
+void PHIElimination::LowerAtomicPHINode(
                                       MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator AfterPHIsIt) {
   ++NumAtomic;
@@ -207,7 +212,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       IncomingReg = entry;
       reusedIncoming = true;
       ++NumReused;
-      DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
     } else {
       const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // Find a safe location to insert the copy, this may be the first terminator
     // in the block (or end()).
     MachineBasicBlock::iterator InsertPos =
-      FindCopyInsertPoint(opBlock, MBB, SrcReg);
+      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
 
     // Insert the copy.
     if (!reusedIncoming && IncomingReg)
@@ -335,6 +340,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 #ifndef NDEBUG
         for (MachineBasicBlock::iterator TI = llvm::next(Term);
              TI != opBlock.end(); ++TI) {
+          if (TI->isDebugValue())
+            continue;
           assert(!TI->readsRegister(SrcReg) &&
                  "Terminator instructions cannot use virtual registers unless"
                  "they are the first terminator in a block!");
@@ -343,9 +350,13 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       } else if (reusedIncoming || !IncomingReg) {
         // We may have to rewind a bit if we didn't insert a copy this time.
         KillInst = Term;
-        while (KillInst != opBlock.begin())
-          if ((--KillInst)->readsRegister(SrcReg))
+        while (KillInst != opBlock.begin()) {
+          --KillInst;
+          if (KillInst->isDebugValue())
+            continue;
+          if (KillInst->readsRegister(SrcReg))
             break;
+        }
       } else {
         // We just inserted this copy.
         KillInst = prior(InsertPos);
@@ -371,7 +382,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 /// used in a PHI node. We map that to the BB the vreg is coming from. This is
 /// used later to determine when the vreg is killed in the BB.
 ///
-void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
        I != E; ++I)
     for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
@@ -381,10 +392,10 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
                                      BBI->getOperand(i).getReg())];
 }
 
-bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
-                                         MachineBasicBlock &MBB,
-                                         LiveVariables &LV,
-                                         MachineLoopInfo *MLI) {
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+                                   MachineBasicBlock &MBB,
+                                   LiveVariables &LV,
+                                   MachineLoopInfo *MLI) {
   if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
     return false;   // Quick exit for basic blocks without PHIs.
 
@@ -403,10 +414,14 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
           !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
         if (!MLI ||
             !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
-              MLI->isLoopHeader(&MBB)))
-          Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+              MLI->isLoopHeader(&MBB))) {
+          if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+            Changed = true;
+            ++NumCriticalEdgesSplit;
+          }
+        }
       }
     }
   }
-  return true;
+  return Changed;
 }
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
deleted file mode 100644
index 45a97182e71c..000000000000
--- a/lib/CodeGen/PHIElimination.h
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
-#define LLVM_CODEGEN_PHIELIMINATION_HPP
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
-  class LiveVariables;
-  class MachineRegisterInfo;
-  class MachineLoopInfo;
-  
-  /// Lower PHI instructions to copies.  
-  class PHIElimination : public MachineFunctionPass {
-    MachineRegisterInfo *MRI; // Machine register information
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    PHIElimination() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
-  private:
-    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
-    /// in predecessor basic blocks.
-    ///
-    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
-    void LowerAtomicPHINode(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator AfterPHIsIt);
-
-    /// analyzePHINodes - Gather information about the PHI nodes in
-    /// here. In particular, we want to map the number of uses of a virtual
-    /// register which is used in a PHI node. We map that to the BB the
-    /// vreg is coming from. This is used later to determine when the vreg
-    /// is killed in the BB.
-    ///
-    void analyzePHINodes(const MachineFunction& Fn);
-
-    /// Split critical edges where necessary for good coalescer performance.
-    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
-                       LiveVariables &LV, MachineLoopInfo *MLI);
-
-    /// SplitCriticalEdge - Split a critical edge from A to B by
-    /// inserting a new MBB. Update branches in A and PHI instructions
-    /// in B. Return the new block.
-    MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
-                                         MachineBasicBlock *B);
-
-    /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
-    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
-    /// any def of SrcReg, but before any subsequent point where control flow
-    /// might jump out of the basic block.
-    MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
-                                                    MachineBasicBlock &SuccMBB,
-                                                    unsigned SrcReg);
-
-    // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
-    // also after any exception handling labels: in landing pads execution
-    // starts at the label, so any copies placed before it won't be executed!
-    // We also deal with DBG_VALUEs, which are a bit tricky:
-    //  PHI
-    //  DBG_VALUE
-    //  LABEL
-    // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
-    // needs to be annulled or, better, moved to follow the label, as well.
-    //  PHI
-    //  DBG_VALUE
-    //  no label
-    // Here it is not a good idea to skip the DBG_VALUE.
-    // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
-    // maximally stupid.
-    MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
-                                                MachineBasicBlock::iterator I) {
-      // Rather than assuming that EH labels come before other kinds of labels,
-      // just skip all labels.
-      while (I != MBB.end() && 
-             (I->isPHI() || I->isLabel() || I->isDebugValue())) {
-        if (I->isDebugValue() && I->getNumOperands()==3 && 
-            I->getOperand(0).isReg())
-          I->getOperand(0).setReg(0U);
-        ++I;
-      }
-      return I;
-    }
-
-    typedef std::pair<unsigned, unsigned> BBVRegPair;
-    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
-
-    VRegPHIUse VRegPHIUseCount;
-
-    // Defs of PHI sources which are implicit_def.
-    SmallPtrSet<MachineInstr*, 4> ImpDefs;
-
-    // Map reusable lowered PHI node -> incoming join register.
-    typedef DenseMap<MachineInstr*, unsigned,
-                     MachineInstrExpressionTrait> LoweredPHIMap;
-    LoweredPHIMap LoweredPHIs;
-  };
-
-}
-
-#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..10bfdcce6769
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                             unsigned SrcReg) {
+  // Handle the trivial case trivially.
+  if (MBB->empty())
+    return MBB->begin();
+
+  // Usually, we just want to insert the copy before the first terminator
+  // instruction. However, for the edge going to a landing pad, we must insert
+  // the copy before the call/invoke instruction.
+  if (!SuccMBB->isLandingPad())
+    return MBB->getFirstTerminator();
+
+  // Discover any defs/uses in this basic block.
+  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+         RE = MRI.reg_end(); RI != RE; ++RI) {
+    MachineInstr* DefUseMI = &*RI;
+    if (DefUseMI->getParent() == MBB)
+      DefUsesInMBB.insert(DefUseMI);
+  }
+
+  MachineBasicBlock::iterator InsertPoint;
+  if (DefUsesInMBB.empty()) {
+    // No defs.  Insert the copy at the start of the basic block.
+    InsertPoint = MBB->begin();
+  } else if (DefUsesInMBB.size() == 1) {
+    // Insert the copy immediately after the def/use.
+    InsertPoint = *DefUsesInMBB.begin();
+    ++InsertPoint;
+  } else {
+    // Insert the copy immediately after the last def/use.
+    InsertPoint = MBB->end();
+    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+    ++InsertPoint;
+  }
+
+  // Make sure the copy goes after any phi nodes however.
+  return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+    /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+    /// any def of SrcReg, but before any subsequent point where control flow
+    /// might jump out of the basic block.
+    MachineBasicBlock::iterator
+    findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                           unsigned SrcReg);
+}
+
+#endif
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 17cee46ca16c..5d7123caa017 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -41,7 +41,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -50,8 +52,13 @@ static cl::opt<bool>
 Aggressive("aggressive-ext-opt", cl::Hidden,
            cl::desc("Aggressive extension optimization"));
 
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+                cl::desc("Disable the peephole optimizer"));
+
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold,    "Number of move immediate foled");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -62,7 +69,9 @@ namespace {
 
   public:
     static char ID; // Pass identification
-    PeepholeOptimizer() : MachineFunctionPass(ID) {}
+    PeepholeOptimizer() : MachineFunctionPass(ID) {
+      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -79,12 +88,21 @@ namespace {
     bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool isMoveImmediate(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &ImmDefRegs,
+                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallSet<unsigned, 4> &ImmDefRegs,
+                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
   };
 }
 
 char PeepholeOptimizer::ID = 0;
-INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
-                "Peephole Optimizations", false, false);
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
 
 FunctionPass *llvm::createPeepholeOptimizerPass() {
   return new PeepholeOptimizer();
@@ -102,12 +120,10 @@ FunctionPass *llvm::createPeepholeOptimizerPass() {
 bool PeepholeOptimizer::
 OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                  SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
-  LocalMIs.insert(MI);
-
   unsigned SrcReg, DstReg, SubIdx;
   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
     return false;
-
+  
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
@@ -232,22 +248,17 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
 /// set) the same flag as the compare, then we can remove the comparison and use
 /// the flag from the previous instruction.
 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
-                                         MachineBasicBlock *MBB) {
+                                         MachineBasicBlock *MBB){
   // If this instruction is a comparison against zero and isn't comparing a
   // physical register, we can try to optimize it.
   unsigned SrcReg;
-  int CmpValue;
-  if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
-      TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
-    return false;
-
-  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
-  if (llvm::next(DI) != MRI->def_end())
-    // Only support one definition.
+  int CmpMask, CmpValue;
+  if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
 
-  // Attempt to convert the defining instruction to set the "zero" flag.
-  if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+  // Attempt to optimize the comparison instruction.
+  if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
     ++NumEliminated;
     return true;
   }
@@ -255,7 +266,53 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
   return false;
 }
 
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+                                        SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isMoveImmediate())
+    return false;
+  if (TID.getNumDefs() != 1)
+    return false;
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    ImmDefMIs.insert(std::make_pair(Reg, MI));
+    ImmDefRegs.insert(Reg);
+    return true;
+  }
+  
+  return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                                      SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (ImmDefRegs.count(Reg) == 0)
+      continue;
+    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+    assert(II != ImmDefMIs.end());
+    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+      ++NumImmFold;
+      return true;
+    }
+  }
+  return false;
+}
+
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePeephole)
+    return false;
+  
   TM  = &MF.getTarget();
   TII = TM->getInstrInfo();
   MRI = &MF.getRegInfo();
@@ -264,22 +321,50 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  SmallSet<unsigned, 4> ImmDefRegs;
+  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
+    
+    bool SeenMoveImm = false;
     LocalMIs.clear();
+    ImmDefRegs.clear();
+    ImmDefMIs.clear();
 
+    bool First = true;
+    MachineBasicBlock::iterator PMII;
     for (MachineBasicBlock::iterator
-           MII = I->begin(), ME = I->end(); MII != ME; ) {
+           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
+      LocalMIs.insert(MI);
 
-      if (MI->getDesc().isCompare() &&
-          !MI->getDesc().hasUnmodeledSideEffects()) {
-        ++MII; // The iterator may become invalid if the compare is deleted.
-        Changed |= OptimizeCmpInstr(MI, MBB);
+      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+          MI->hasUnmodeledSideEffects()) {
+        ++MII;
+        continue;
+      }
+
+      if (MI->getDesc().isCompare()) {
+        if (OptimizeCmpInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }
+      }
+
+      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+        SeenMoveImm = true;
       } else {
         Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
-        ++MII;
+        if (SeenMoveImm)
+          Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
+
+      First = false;
+      PMII = MII;
+      ++MII;
     }
   }
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f0bd6d1372be..60c24b710792 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -133,18 +133,12 @@ namespace {
     std::vector<unsigned> KillIndices;
 
   public:
-    SchedulePostRATDList(MachineFunction &MF,
-                         const MachineLoopInfo &MLI,
-                         const MachineDominatorTree &MDT,
-                         ScheduleHazardRecognizer *HR,
-                         AntiDepBreaker *ADB,
-                         AliasAnalysis *aa)
-      : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
-        HazardRec(HR), AntiDepBreak(ADB), AA(aa),
-        KillIndices(TRI->getNumRegs()) {}
-
-    ~SchedulePostRATDList() {
-    }
+    SchedulePostRATDList(
+      MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+    ~SchedulePostRATDList();
 
     /// StartBlock - Initialize register live-range state for scheduling in
     /// this block.
@@ -183,9 +177,34 @@ namespace {
   };
 }
 
+SchedulePostRATDList::SchedulePostRATDList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+    KillIndices(TRI->getNumRegs())
+{
+  const TargetMachine &TM = MF.getTarget();
+  const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+  HazardRec =
+    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+  AntiDepBreak =
+    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+  delete HazardRec;
+  delete AntiDepBreak;
+}
+
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
-  AA = &getAnalysis<AliasAnalysis>();
   TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
 
   // Check for explicit enable/disable of post-ra scheduling.
   TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       return false;
   } else {
     // Check that post-RA scheduling is enabled for this target.
+    // This may upgrade the AntiDepMode.
     const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
     if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
       return false;
@@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   DEBUG(dbgs() << "PostRAScheduler\n");
 
-  const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-  const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-  const TargetMachine &TM = Fn.getTarget();
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  ScheduleHazardRecognizer *HR =
-    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
-  AntiDepBreaker *ADB =
-    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
-     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
-      (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
-
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+                                 CriticalPathRCs);
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     Scheduler.FixupKills(MBB);
   }
 
-  delete HR;
-  delete ADB;
-
   return true;
 }
 
@@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
         MinDepth = PendingQueue[i]->getDepth();
     }
 
-    DEBUG(dbgs() << "\n*** Examining Available\n";
-          LatencyPriorityQueue q = AvailableQueue;
-          while (!q.empty()) {
-            SUnit *su = q.pop();
-            dbgs() << "Height " << su->getHeight() << ": ";
-            su->dump(this);
-          });
+    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
 
     SUnit *FoundSUnit = 0;
     bool HasNoopHazards = false;
@@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       SUnit *CurSUnit = AvailableQueue.pop();
 
       ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit);
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
       if (HT == ScheduleHazardRecognizer::NoHazard) {
         FoundSUnit = CurSUnit;
         break;
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cd9d83eeb684..d6e31dae9d13 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -91,8 +91,9 @@ namespace {
 
   public:
     static char ID;
-    PreAllocSplitting()
-      : MachineFunctionPass(ID) {}
+    PreAllocSplitting() : MachineFunctionPass(ID) {
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
@@ -106,10 +107,8 @@ namespace {
       AU.addPreserved<LiveStacks>();
       AU.addPreserved<RegisterCoalescer>();
       AU.addPreserved<CalculateSpillWeights>();
-      if (StrongPHIElim)
-        AU.addPreservedID(StrongPHIEliminationID);
-      else
-        AU.addPreservedID(PHIEliminationID);
+      AU.addPreservedID(StrongPHIEliminationID);
+      AU.addPreservedID(PHIEliminationID);
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<VirtRegMap>();
@@ -203,9 +202,18 @@ namespace {
 
 char PreAllocSplitting::ID = 0;
 
-INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
+                "Pre-Register Allocation Live Interval Splitting",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
                 "Pre-Register Allocation Live Interval Splitting",
-                false, false);
+                false, false)
 
 char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
 
@@ -324,7 +332,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
   if (CurrSLI->hasAtLeastOneValue())
     CurrSValNo = CurrSLI->getValNumInfo(0);
   else
-    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
                                        LSs->getVNInfoAllocator());
   return SS;
 }
@@ -585,7 +593,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
 
   SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
   VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
+    LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
                      LIs->getVNInfoAllocator());
 
   if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
@@ -674,7 +682,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
     DefIdx = DefIdx.getDefIndex();
     
     assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
+    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
     
     // If the def is a move, set the copy field.
     if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
@@ -807,7 +815,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
   MachineBasicBlock& MBB = *RestorePt->getParent();
   
   MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
+  if (!DefMI || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
   else
     KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
@@ -872,7 +880,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
     if (CurrSLI->hasAtLeastOneValue())
       CurrSValNo = CurrSLI->getValNumInfo(0);
     else
-      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
                                          LSs->getVNInfoAllocator());
   }
   
@@ -967,8 +975,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
 
   assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
 
-  MachineInstr *DefMI = ValNo->isDefAccurate()
-    ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+  MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
 
   // If this would create a new join point, do not split.
   if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
@@ -1005,7 +1012,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   SlotIndex SpillIndex;
   MachineInstr *SpillMI = NULL;
   int SS = -1;
-  if (!ValNo->isDefAccurate()) {
+  if (!DefMI) {
     // If we don't know where the def is we must split just before the barrier.
     if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
                             BarrierMBB, SS, RefsInMBB))) {
@@ -1199,12 +1206,12 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
       
       // We also don't try to handle the results of PHI joins, since there's
       // no defining instruction to analyze.
-      if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
+      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+      if (!DefMI || CurrVN->isUnused()) continue;
     
       // We're only interested in eliminating cruft introduced by the splitter,
       // is of the form load-use or load-use-store.  First, check that the
       // definition is a load, and remember what stack slot we loaded it from.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
       int FrameIndex;
       if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
       
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b8831db1d118..9cd9941e56b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,11 @@
 using namespace llvm;
 
 char ProcessImplicitDefs::ID = 0;
-INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
-                "Process Implicit Definitions.", false, false);
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
 
 void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e2802c1fdf4a..ad7b6e4aa97f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,6 +21,7 @@
 
 #define DEBUG_TYPE "pei"
 #include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -29,7 +30,7 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -44,8 +45,12 @@ using namespace llvm;
 
 char PEI::ID = 0;
 
-INITIALIZE_PASS(PEI, "prologepilog",
-                "Prologue/Epilogue Insertion", false, false);
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
 
 STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
@@ -61,6 +66,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
 bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   const Function* F = Fn.getFunction();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
@@ -71,7 +78,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   // Allow the target machine to make some adjustments to the function
   // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
-  TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
 
   // Scan the function for modified callee saved registers and insert spill code
   // for any callee saved registers that are modified.
@@ -91,7 +98,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   // Allow the target machine to make final modifications to the function
   // before the frame layout is finalized.
-  TRI->processFunctionBeforeFrameFinalized(Fn);
+  TFI->processFunctionBeforeFrameFinalized(Fn);
 
   // Calculate actual frame offsets for all abstract stack objects...
   calculateFrameObjectOffsets(Fn);
@@ -138,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
@@ -165,7 +173,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         FrameSDOps.push_back(I);
       } else if (I->isInlineAsm()) {
         // Some inline asm's need a stack frame, as indicated by operand 1.
-        if (I->getOperand(1).getImm())
+        unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+        if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
           AdjustsStack = true;
       }
 
@@ -180,7 +189,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
     // the target doesn't indicate otherwise, remove the call frame pseudos
     // here. The sub/add sp instruction pairs are still inserted, but we don't
     // need to track the SP adjustment for frame index elimination.
-    if (RegInfo->canSimplifyCallFramePseudos(Fn))
+    if (TFI->canSimplifyCallFramePseudos(Fn))
       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
 }
@@ -190,7 +199,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
 /// registers.
 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
@@ -229,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
     return;   // Early exit if no callee saved registers are modified!
 
   unsigned NumFixedSpillSlots;
-  const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
     TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
 
   // Now that we know which registers need to be saved and restored, allocate
@@ -247,7 +256,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
 
     // Check to see if this physreg must be spilled to a particular stack slot
     // on this target.
-    const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
     while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
            FixedSlot->Reg != Reg)
       ++FixedSlot;
@@ -290,13 +299,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
     return;
 
   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   MachineBasicBlock::iterator I;
 
   if (! ShrinkWrapThisFunction) {
     // Spill using target interface.
     I = EntryBlock->begin();
-    if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+    if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
       for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
         // Add the callee-saved register as live-in.
         // It's killed at the spill.
@@ -328,7 +338,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Restore all registers immediately before the return and any
       // terminators that preceed it.
-      if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+      if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
           unsigned Reg = CSI[i].getReg();
           const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -480,10 +490,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 /// abstract stack objects.
 ///
 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
-  const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
 
   bool StackGrowsDown =
-    TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -549,7 +559,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
-  if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+  if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -631,17 +641,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Make sure the special register scavenging spill slot is closest to the
   // stack pointer.
-  if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
     int SFI = RS->getScavengingFrameIndex();
     if (SFI >= 0)
       AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
-  if (!RegInfo->targetHandlesStackFrameRounding()) {
+  if (!TFI.targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
+    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
       Offset += MFI->getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
@@ -672,16 +682,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 /// prolog and epilog code to the function.
 ///
 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
-  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
 
   // Add prologue to the function...
-  TRI->emitPrologue(Fn);
+  TFI.emitPrologue(Fn);
 
   // Add epilogue to restore the callee-save registers in each exiting block
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
     if (!I->empty() && I->back().getDesc().isReturn())
-      TRI->emitEpilogue(Fn, *I);
+      TFI.emitEpilogue(Fn, *I);
   }
 }
 
@@ -694,9 +704,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetMachine &TM = Fn.getTarget();
   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
-  const TargetFrameInfo *TFI = TM.getFrameInfo();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
   bool StackGrowsDown =
-    TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
   int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
   int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
 
@@ -755,8 +765,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
           // If this instruction has a FrameIndex operand, we need to
           // use that target machine register info object to eliminate
           // it.
-            TRI.eliminateFrameIndex(MI, SPAdj,
-                                    FrameIndexVirtualScavenging ?  NULL : RS);
+          TRI.eliminateFrameIndex(MI, SPAdj,
+                                  FrameIndexVirtualScavenging ?  NULL : RS);
 
           // Reset the iterator if we were at the beginning of the BB.
           if (AtBeginning) {
@@ -825,7 +835,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
             ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
             ++NumScavengedRegs;
           }
-          // replace this reference to the virtual register with the
+          // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
           MI->getOperand(i).setReg(ScratchReg);
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index d575124a6b3e..e2391591ad06 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,9 @@ namespace llvm {
   class PEI : public MachineFunctionPass {
   public:
     static char ID;
-    PEI() : MachineFunctionPass(ID) {}
+    PEI() : MachineFunctionPass(ID) {
+      initializePEIPass(*PassRegistry::getPassRegistry());
+    }
 
     const char *getPassName() const {
       return "Prolog/Epilog Insertion & Frame Finalization";
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5e86e5a9447e..73b66d868f3d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include <map>
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..8c7e5f53b824
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,181 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include <queue>
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They may also override getPriority() which otherwise
+/// defaults to the spill weight computed by CalculateSpillWeights.
+class RegAllocBase {
+  LiveIntervalUnion::Allocator UnionAllocator;
+protected:
+  // Array of LiveIntervalUnions indexed by physical register.
+  class LiveUnionArray {
+    unsigned NumRegs;
+    LiveIntervalUnion *Array;
+  public:
+    LiveUnionArray(): NumRegs(0), Array(0) {}
+    ~LiveUnionArray() { clear(); }
+
+    unsigned numRegs() const { return NumRegs; }
+
+    void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+    void clear();
+
+    LiveIntervalUnion& operator[](unsigned PhysReg) {
+      assert(PhysReg <  NumRegs && "physReg out of bounds");
+      return Array[PhysReg];
+    }
+  };
+
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  LiveUnionArray PhysReg2LiveUnion;
+
+  // Current queries, one per physreg. They must be reinitialized each time we
+  // query on a new live virtual register.
+  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+  RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+  virtual ~RegAllocBase() {}
+
+  // A RegAlloc pass should call this before allocatePhysRegs.
+  void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+  // Get an initialized query to check interferences between lvr and preg.  Note
+  // that Query::init must be called at least once for each physical register
+  // before querying a new live virtual register. This ties Queries and
+  // PhysReg2LiveUnion together.
+  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+    Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+    return Queries[PhysReg];
+  }
+
+  // The top-level driver. The output is a VirtRegMap that us updated with
+  // physical register assignments.
+  //
+  // If an implementation wants to override the LiveInterval comparator, we
+  // should modify this interface to allow passing in an instance derived from
+  // LiveVirtRegQueue.
+  void allocatePhysRegs();
+
+  // Get a temporary reference to a Spiller instance.
+  virtual Spiller &spiller() = 0;
+
+  // getPriority - Calculate the allocation priority for VirtReg.
+  // Virtual registers with higher priorities are allocated first.
+  virtual float getPriority(LiveInterval *LI) = 0;
+
+  // A RegAlloc pass should override this to provide the allocation heuristics.
+  // Each call must guarantee forward progess by returning an available PhysReg
+  // or new set of split live virtual registers. It is up to the splitter to
+  // converge quickly toward fully spilled live ranges.
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+  // A RegAlloc pass should call this when PassManager releases its memory.
+  virtual void releaseMemory();
+
+  // Helper for checking interference between a live virtual register and a
+  // physical register, including all its register aliases. If an interference
+  // exists, return the interfering register, which may be preg or an alias.
+  unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+  /// assign - Assign VirtReg to PhysReg.
+  /// This should not be called from selectOrSplit for the current register.
+  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+  /// This can be invoked from selectOrSplit, but be careful to guarantee that
+  /// allocation is making progress.
+  void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// addMBBLiveIns - Add physreg liveins to basic blocks.
+  void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+  // Verify each LiveIntervalUnion.
+  void verify();
+#endif
+
+  // Use this group name for NamedRegionTimer.
+  static const char *TimerGroupName;
+
+public:
+  /// VerifyEnabled - True when -verify-regalloc is given.
+  static bool VerifyEnabled;
+
+private:
+  void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&);
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..045c8db9dadb
--- /dev/null
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,523 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "RegAllocBase.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+                                      createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  LiveStacks *LS;
+  RenderMachineFunction *RMF;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+
+public:
+  RABasic();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Basic Register Allocator";
+  }
+
+  /// RABasic analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  DEBUG(AU.addRequired<RenderMachineFunction>());
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
+  // Cache an interferece query for each physical reg
+  Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  PhysReg2LiveUnion.clear();
+}
+
+// Visit all the live virtual registers. If they are already assigned to a
+// physical register, unify them with the corresponding LiveIntervalUnion,
+// otherwise push them on the priority queue for later assignment.
+void RegAllocBase::
+seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) {
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum));
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+
+  // Push each vreg onto a queue or "precolor" by adding it to a physreg union.
+  std::priority_queue<std::pair<float, unsigned> > VirtRegQ;
+  seedLiveVirtRegs(VirtRegQ);
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (!VirtRegQ.empty()) {
+    // Pop the highest priority vreg.
+    LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second);
+    VirtRegQ.pop();
+
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName()
+                 << ':' << VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg)
+      assign(VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval* SplitVirtReg = *I;
+      if (SplitVirtReg->empty()) continue;
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg),
+                                   SplitVirtReg->reg));
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+  assert(Q.seenAllInterferences() && "need collectInterferences()");
+  const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+  for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+         E = PendingSpills.end(); I != E; ++I) {
+    LiveInterval &SpilledVReg = **I;
+    DEBUG(dbgs() << "extracting from " <<
+          TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+    // Deallocate the interfering vreg by removing it from the union.
+    // A LiveInterval instance may not be in a union during modification!
+    unassign(SpilledVReg, PhysReg);
+
+    // Spill the extracted interval.
+    spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+  }
+  // After extracting segments, the query's results are invalid. But keep the
+  // contents valid until we're done accessing pendingSpills.
+  Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Record each interference and determine if all are spillable before mutating
+  // either the union or live intervals.
+  unsigned NumInterferences = 0;
+  // Collect interferences assigned to any alias of the physical register.
+  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+    LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+    NumInterferences += QAlias.collectInterferingVRegs();
+    if (QAlias.seenUnspillableVReg()) {
+      return false;
+    }
+  }
+  DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+        " interferences with " << VirtReg << "\n");
+  assert(NumInterferences > 0 && "expect interference");
+
+  // Spill each interfering vreg allocated to PhysReg or an alias.
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    spillReg(VirtReg, *AliasI, SplitVRegs);
+  return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
+  MBBVec liveInMBBs;
+  MachineBasicBlock &entryMBB = *MF->begin();
+
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
+         ++SI) {
+
+      // Find the set of basic blocks which this range is live into...
+      liveInMBBs.clear();
+      if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
+
+      // And add the physreg for this interval to their live-in sets.
+      for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
+           I != E; ++I) {
+        MachineBasicBlock *MBB = *I;
+        if (MBB == &entryMBB) continue;
+        if (MBB->isLiveIn(PhysReg)) continue;
+        MBB->addLiveIn(PhysReg);
+      }
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+                                SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Populate a list of physical register spill candidates.
+  SmallVector<unsigned, 8> PhysRegSpillCands;
+
+  // Check for an available register in this class.
+  const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
+
+  for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
+         E = TRC->allocation_order_end(*MF);
+       I != E; ++I) {
+
+    unsigned PhysReg = *I;
+    if (ReservedRegs.test(PhysReg)) continue;
+
+    // Check interference and as a side effect, intialize queries for this
+    // VirtReg and its aliases.
+    unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+    if (interfReg == 0) {
+      // Found an available register.
+      return PhysReg;
+    }
+    LiveInterval *interferingVirtReg =
+      Queries[interfReg].firstInterference().liveUnionPos().value();
+
+    // The current VirtReg must either be spillable, or one of its interferences
+    // must have less spill weight.
+    if (interferingVirtReg->weight < VirtReg.weight ) {
+      PhysRegSpillCands.push_back(PhysReg);
+    }
+  }
+  // Try to spill another interfering reg with less spill weight.
+  for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+         PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+    if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+    assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+           "Interference after spill.");
+    // Tell the caller to allocate to this newly freed physical register.
+    return *PhysRegI;
+  }
+  // No other spill candidates were found, so spill the current VirtReg.
+  DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+  SmallVector<LiveInterval*, 1> pendingSpills;
+
+  spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+
+  ReservedRegs = TRI->getReservedRegs(*MF);
+
+  SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+
+  allocatePhysRegs();
+
+  addMBBLiveIns(MF);
+
+  // Diagnostic output before rewriting
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+  // optional HTML output
+  DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+  // FIXME: Verification currently must run before VirtRegRewriter. We should
+  // make the rewriter a separate pass and override verifyAnalysis instead. When
+  // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+  if (VerifyEnabled) {
+    // Verify accuracy of LiveIntervals. The standard machine code verifier
+    // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+    // FIXME: MachineVerifier is badly broken when using the standard
+    // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+    // inline spiller, some tests fail to verify because the coalescer does not
+    // always generate verifiable code.
+    MF->verify(this, "In RABasic::verify");
+
+    // Verify that LiveIntervals are partitioned into unions and disjoint within
+    // the unions.
+    verify();
+  }
+#endif // !NDEBUG
+
+  // Run rewriter
+  VRM->rewrite(LIS->getSlotIndexes());
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+  return new RABasic();
+}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fc150d55e226..15036e38b893 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -48,7 +48,10 @@ namespace {
   public:
     static char ID;
     RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
-               isBulkSpilling(false) {}
+               isBulkSpilling(false) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
@@ -259,8 +262,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // instruction, not on the spill.
     bool SpillKill = LR.LastUse != MI;
     LR.Dirty = false;
-    DEBUG(dbgs() << "Spilling %reg" << LRI->first
-                 << " in " << TRI->getName(LR.PhysReg));
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+                 << " in " << PrintReg(LR.PhysReg, TRI));
     const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
     int FI = getStackSpaceFor(LRI->first, RC);
     DEBUG(dbgs() << " to stack slot #" << FI << "\n");
@@ -331,7 +334,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     MO.setIsKill();
     return;
   default:
-    // The physreg was allocated to a virtual register. That means to value we
+    // The physreg was allocated to a virtual register. That means the value we
     // wanted has been clobbered.
     llvm_unreachable("Instruction uses an allocated register");
   }
@@ -458,8 +461,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
 /// register must not be used for anything else when this is called.
 ///
 void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
-  DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
-               << TRI->getName(PhysReg) << "\n");
+  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+               << PrintReg(PhysReg, TRI) << "\n");
   PhysRegState[PhysReg] = LRE.first;
   assert(!LRE.second.PhysReg && "Already assigned a physreg");
   LRE.second.PhysReg = PhysReg;
@@ -503,8 +506,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
       return assignVirtToPhysReg(LRE, PhysReg);
   }
 
-  DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
-               << "\n");
+  DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+               << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
   for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
@@ -584,8 +587,8 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
     allocVirtReg(MI, *LRI, Hint);
     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
-                 << TRI->getName(LR.PhysReg) << "\n");
+    DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+                 << PrintReg(LR.PhysReg, TRI) << "\n");
     TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
     ++NumLoads;
   } else if (LR.Dirty) {
@@ -653,11 +656,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
     if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
         (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
       if (ThroughRegs.insert(Reg))
-        DEBUG(dbgs() << " %reg" << Reg);
+        DEBUG(dbgs() << ' ' << PrintReg(Reg));
     }
   }
 
@@ -685,7 +689,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
     if (MO.isUse()) {
       unsigned DefIdx = 0;
       if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
@@ -731,6 +735,27 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
 void RAFast::AllocateBasicBlock() {
   DEBUG(dbgs() << "\nAllocating " << *MBB);
 
+  // FIXME: This should probably be added by instruction selection instead?
+  // If the last instruction in the block is a return, make sure to mark it as
+  // using all of the live-out values in the function.  Things marked both call
+  // and return are tail calls; do not do this for them.  The tail callee need
+  // not take the same registers as input that it produces as output, and there
+  // are dependencies for its input registers elsewhere.
+  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+      !MBB->back().getDesc().isCall()) {
+    MachineInstr *Ret = &MBB->back();
+
+    for (MachineRegisterInfo::liveout_iterator
+         I = MF->getRegInfo().liveout_begin(),
+         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+             "Cannot have a live-out virtual register.");
+
+      // Add live-out registers as implicit uses.
+      Ret->addRegisterKilled(*I, TRI, true);
+    }
+  }
+
   PhysRegState.assign(TRI->getNumRegs(), regDisabled);
   assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
 
@@ -761,7 +786,7 @@ void RAFast::AllocateBasicBlock() {
             dbgs() << "*";
             break;
           default:
-            dbgs() << "=%reg" << PhysRegState[Reg];
+            dbgs() << '=' << PrintReg(PhysRegState[Reg]);
             if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
               dbgs() << "*";
             assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
@@ -791,16 +816,18 @@ void RAFast::AllocateBasicBlock() {
           MachineOperand &MO = MI->getOperand(i);
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
-          if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
           LiveDbgValueMap[Reg] = MI;
           LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
           if (LRI != LiveVirtRegs.end())
             setPhysReg(MI, i, LRI->second.PhysReg);
           else {
             int SS = StackSlotForVirtReg[Reg];
-            if (SS == -1)
+            if (SS == -1) {
               // We can't allocate a physreg for a DebugValue, sorry!
+              DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
               MO.setReg(0);
+            }
             else {
               // Modify DBG_VALUE now that the value is in a spill slot.
               int64_t Offset = MI->getOperand(1).getImm();
@@ -817,9 +844,11 @@ void RAFast::AllocateBasicBlock() {
                 MI = NewDV;
                 ScanDbgValue = true;
                 break;
-              } else
+              } else {
                 // We can't allocate a physreg for a DebugValue; sorry!
+                DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
                 MO.setReg(0);
+              }
             }
           }
         }
@@ -902,7 +931,7 @@ void RAFast::AllocateBasicBlock() {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
-      if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
       if (MO.isUse()) {
         LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
         unsigned PhysReg = LRI->second.PhysReg;
@@ -1017,8 +1046,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
-  unsigned LastVirtReg = MRI->getLastVirtReg();
-  StackSlotForVirtReg.grow(LastVirtReg);
+  StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..c1372cd038cf
--- /dev/null
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1285 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits,  "Number of split local live ranges");
+STATISTIC(NumReassigned,   "Number of interferences reassigned");
+STATISTIC(NumEvicted,      "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+                                       createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  SlotIndexes *Indexes;
+  LiveStacks *LS;
+  MachineDominatorTree *DomTree;
+  MachineLoopInfo *Loops;
+  MachineLoopRanges *LoopRanges;
+  EdgeBundles *Bundles;
+  SpillPlacement *SpillPlacer;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::auto_ptr<SplitAnalysis> SA;
+
+  // splitting state.
+
+  /// All basic blocks where the current register is live.
+  SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints;
+
+  /// For every instruction in SA->UseSlots, store the previous non-copy
+  /// instruction.
+  SmallVector<SlotIndex, 8> PrevSlot;
+
+public:
+  RAGreedy();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Greedy Register Allocator";
+  }
+
+  /// RAGreedy analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI);
+
+  virtual unsigned selectOrSplit(LiveInterval&,
+                                 SmallVectorImpl<LiveInterval*>&);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+
+private:
+  bool checkUncachedInterference(LiveInterval&, unsigned);
+  LiveInterval *getSingleInterference(LiveInterval&, unsigned);
+  bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
+  float calcInterferenceWeight(LiveInterval&, unsigned);
+  float calcInterferenceInfo(LiveInterval&, unsigned);
+  float calcGlobalSplitCost(const BitVector&);
+  void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+                         SmallVectorImpl<LiveInterval*>&);
+  void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+  SlotIndex getPrevMappedIndex(const MachineInstr*);
+  void calcPrevSlots();
+  unsigned nextSplitPoint(unsigned);
+
+  unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&,
+                              SmallVectorImpl<LiveInterval*>&);
+  unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+                          SmallVectorImpl<LiveInterval*>&);
+  unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySplit(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySpillInterferences(LiveInterval&, AllocationOrder&,
+                                 SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+  return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<MachineLoopRanges>();
+  AU.addPreserved<MachineLoopRanges>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  AU.addRequired<EdgeBundles>();
+  AU.addRequired<SpillPlacement>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RAGreedy::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+float RAGreedy::getPriority(LiveInterval *LI) {
+  float Priority = LI->weight;
+
+  // Prioritize hinted registers so they are allocated first.
+  std::pair<unsigned, unsigned> Hint;
+  if (Hint.first || Hint.second) {
+    // The hint can be target specific, a virtual register, or a physreg.
+    Priority *= 2;
+
+    // Prefer physreg hints above anything else.
+    if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second))
+      Priority *= 2;
+  }
+  return Priority;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Register Reassignment
+//===----------------------------------------------------------------------===//
+
+// Check interference without using the cache.
+bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
+                                         unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
+    if (subQ.checkInterference())
+      return true;
+  }
+  return false;
+}
+
+/// getSingleInterference - Return the single interfering virtual register
+/// assigned to PhysReg. Return 0 if more than one virtual register is
+/// interfering.
+LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
+                                              unsigned PhysReg) {
+  // Check physreg and aliases.
+  LiveInterval *Interference = 0;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    if (Q.checkInterference()) {
+      if (Interference)
+        return 0;
+      Q.collectInterferingVRegs(1);
+      if (!Q.seenAllInterferences())
+        return 0;
+      Interference = Q.interferingVRegs().front();
+    }
+  }
+  return Interference;
+}
+
+// Attempt to reassign this virtual register to a different physical register.
+//
+// FIXME: we are not yet caching these "second-level" interferences discovered
+// in the sub-queries. These interferences can change with each call to
+// selectOrSplit. However, we could implement a "may-interfere" cache that
+// could be conservatively dirtied when we reassign or split.
+//
+// FIXME: This may result in a lot of alias queries. We could summarize alias
+// live intervals in their parent register's live union, but it's messy.
+bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
+                            unsigned WantedPhysReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
+         "Can only reassign virtual registers");
+  assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
+         "inconsistent phys reg assigment");
+
+  AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    // Don't reassign to a WantedPhysReg alias.
+    if (TRI->regsOverlap(PhysReg, WantedPhysReg))
+      continue;
+
+    if (checkUncachedInterference(InterferingVReg, PhysReg))
+      continue;
+
+    // Reassign the interfering virtual reg to this physical reg.
+    unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
+    DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
+          TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
+    unassign(InterferingVReg, OldAssign);
+    assign(InterferingVReg, PhysReg);
+    ++NumReassigned;
+    return true;
+  }
+  return false;
+}
+
+/// tryReassignOrEvict - Try to reassign a single interferences to a different
+/// physreg, or evict a single interference with a lower spill weight.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
+                                      AllocationOrder &Order,
+                                      SmallVectorImpl<LiveInterval*> &NewVRegs){
+  NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+
+  // Keep track of the lightest single interference seen so far.
+  float BestWeight = VirtReg.weight;
+  LiveInterval *BestVirt = 0;
+  unsigned BestPhys = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
+    if (!InterferingVReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
+      continue;
+    if (reassignVReg(*InterferingVReg, PhysReg))
+      return PhysReg;
+
+    // Cannot reassign, is this an eviction candidate?
+    if (InterferingVReg->weight < BestWeight) {
+      BestVirt = InterferingVReg;
+      BestPhys = PhysReg;
+      BestWeight = InterferingVReg->weight;
+    }
+  }
+
+  // Nothing reassigned, can we evict a lighter single interference?
+  if (BestVirt) {
+    DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n');
+    unassign(*BestVirt, VRM->getPhys(BestVirt->reg));
+    ++NumEvicted;
+    NewVRegs.push_back(BestVirt);
+    return BestPhys;
+  }
+
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints
+/// when considering interference from PhysReg. Also compute an optimistic local
+/// cost of this interference pattern.
+///
+/// The final cost of a split is the local cost + global cost of preferences
+/// broken by SpillPlacement.
+///
+float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
+  // Reset interference dependent info.
+  SpillConstraints.resize(SA->LiveBlocks.size());
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    BC.Number = BI.MBB->getNumber();
+    BC.Entry = (BI.Uses && BI.LiveIn) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = (BI.Uses && BI.LiveOut) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BI.OverlapEntry = BI.OverlapExit = false;
+  }
+
+  // Add interference info from each PhysReg alias.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(VirtReg, *AI).checkInterference())
+      continue;
+    LiveIntervalUnion::SegmentIter IntI =
+      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+    if (!IntI.valid())
+      continue;
+
+    // Determine which blocks have interference live in or after the last split
+    // point.
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // Is the interference live-in?
+      if (BI.LiveIn) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() <= Start)
+          BC.Entry = SpillPlacement::MustSpill;
+      }
+
+      // Is the interference overlapping the last split point?
+      if (BI.LiveOut) {
+        if (IntI.stop() < BI.LastSplitPoint)
+          IntI.advanceTo(BI.LastSplitPoint.getPrevSlot());
+        if (!IntI.valid())
+          break;
+        if (IntI.start() < Stop)
+          BC.Exit = SpillPlacement::MustSpill;
+      }
+    }
+
+    // Rewind iterator and check other interferences.
+    IntI.find(VirtReg.beginIndex());
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // Handle transparent blocks with interference separately.
+      // Transparent blocks never incur any fixed cost.
+      if (BI.LiveThrough && !BI.Uses) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() >= Stop)
+          continue;
+
+        if (BC.Entry != SpillPlacement::MustSpill)
+          BC.Entry = SpillPlacement::PrefSpill;
+        if (BC.Exit != SpillPlacement::MustSpill)
+          BC.Exit = SpillPlacement::PrefSpill;
+        continue;
+      }
+
+      // Now we only have blocks with uses left.
+      // Check if the interference overlaps the uses.
+      assert(BI.Uses && "Non-transparent block without any uses");
+
+      // Check interference on entry.
+      if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        // Not live in, but before the first use.
+        if (IntI.start() < BI.FirstUse)
+          BC.Entry = SpillPlacement::PrefSpill;
+      }
+
+      // Does interference overlap the uses in the entry segment
+      // [FirstUse;Kill)?
+      if (BI.LiveIn && !BI.OverlapEntry) {
+        IntI.advanceTo(BI.FirstUse);
+        if (!IntI.valid())
+          break;
+        // A live-through interval has no kill.
+        // Check [FirstUse;LastUse) instead.
+        if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+          BI.OverlapEntry = true;
+      }
+
+      // Does interference overlap the uses in the exit segment [Def;LastUse)?
+      if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) {
+        IntI.advanceTo(BI.Def);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() < BI.LastUse)
+          BI.OverlapExit = true;
+      }
+
+      // Check interference on exit.
+      if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) {
+        // Check interference between LastUse and Stop.
+        if (BC.Exit != SpillPlacement::PrefSpill) {
+          IntI.advanceTo(BI.LastUse);
+          if (!IntI.valid())
+            break;
+          if (IntI.start() < Stop)
+            BC.Exit = SpillPlacement::PrefSpill;
+        }
+      }
+    }
+  }
+
+  // Accumulate a local cost of this interference pattern.
+  float LocalCost = 0;
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    if (!BI.Uses)
+      continue;
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    unsigned Inserts = 0;
+
+    // Do we need spill code for the entry segment?
+    if (BI.LiveIn)
+      Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg;
+
+    // For the exit segment?
+    if (BI.LiveOut)
+      Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg;
+
+    // The local cost of spill code in this block is the block frequency times
+    // the number of spill instructions inserted.
+    if (Inserts)
+      LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB);
+  }
+  DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = "
+               << LocalCost << '\n');
+  return LocalCost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SpillConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+  float GlobalCost = 0;
+  for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) {
+    SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+    unsigned Inserts = 0;
+    // Broken entry preference?
+    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] !=
+                 (BC.Entry == SpillPlacement::PrefReg);
+    // Broken exit preference?
+    Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] !=
+                 (BC.Exit == SpillPlacement::PrefReg);
+    if (Inserts)
+      GlobalCost +=
+        Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB);
+  }
+  DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n');
+  return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
+                                 const BitVector &LiveBundles,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  DEBUG({
+    dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+           << " with bundles";
+    for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+      dbgs() << " EB#" << i;
+    dbgs() << ".\n";
+  });
+
+  // First compute interference ranges in the live blocks.
+  typedef std::pair<SlotIndex, SlotIndex> IndexPair;
+  SmallVector<IndexPair, 8> InterferenceRanges;
+  InterferenceRanges.resize(SA->LiveBlocks.size());
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(VirtReg, *AI).checkInterference())
+      continue;
+    LiveIntervalUnion::SegmentIter IntI =
+      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+    if (!IntI.valid())
+      continue;
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      IndexPair &IP = InterferenceRanges[i];
+      SlotIndex Start, Stop;
+      tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+      // Skip interference-free blocks.
+      if (IntI.start() >= Stop)
+        continue;
+
+      // First interference in block.
+      if (BI.LiveIn) {
+        IntI.advanceTo(Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() >= Stop)
+          continue;
+        if (!IP.first.isValid() || IntI.start() < IP.first)
+          IP.first = IntI.start();
+      }
+
+      // Last interference in block.
+      if (BI.LiveOut) {
+        IntI.advanceTo(Stop);
+        if (!IntI.valid() || IntI.start() >= Stop)
+          --IntI;
+        if (IntI.stop() <= Start)
+          continue;
+        if (!IP.second.isValid() || IntI.stop() > IP.second)
+          IP.second = IntI.stop();
+      }
+    }
+  }
+
+  SmallVector<LiveInterval*, 4> SpillRegs;
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+  // Create the main cross-block interval.
+  SE.openIntv();
+
+  // First add all defs that are live out of a block.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Should the register be live out?
+    if (!BI.LiveOut || !RegOut)
+      continue;
+
+    IndexPair &IP = InterferenceRanges[i];
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+    DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
+                 << Bundles->getBundle(BI.MBB->getNumber(), 1)
+                 << " intf [" << IP.first << ';' << IP.second << ')');
+
+    // The interference interval should either be invalid or overlap MBB.
+    assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference");
+    assert((!IP.second.isValid() || IP.second > Start) && "Bad interference");
+
+    // Check interference leaving the block.
+    if (!IP.second.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        DEBUG(dbgs() << ", no uses"
+                     << (RegIn ? ", live-through.\n" : ", stack in.\n"));
+        if (!RegIn)
+          SE.enterIntvAtEnd(*BI.MBB);
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", not live-through.\n");
+        SE.useIntv(SE.enterIntvBefore(BI.Def), Stop);
+        continue;
+      }
+      if (!RegIn) {
+        // Block is live-through, but entry bundle is on the stack.
+        // Reload just before the first use.
+        DEBUG(dbgs() << ", not live-in, enter before first use.\n");
+        SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop);
+        continue;
+      }
+      DEBUG(dbgs() << ", live-through.\n");
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference to " << IP.second);
+
+    if (!BI.LiveThrough && IP.second <= BI.Def) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
+      SE.useIntv(BI.Def, Stop);
+      continue;
+    }
+
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by reloading as late as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+      assert(SegStart >= IP.second && "Couldn't avoid interference");
+      continue;
+    }
+
+    if (IP.second.getBoundaryIndex() < BI.LastUse) {
+      // There are interference-free uses at the end of the block.
+      // Find the first use that can get the live-out register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.second.getBoundaryIndex());
+      assert(UI != SA->UseSlots.end() && "Couldn't find last use");
+      SlotIndex Use = *UI;
+      assert(Use <= BI.LastUse && "Couldn't find last use");
+      // Only attempt a split befroe the last split point.
+      if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+        DEBUG(dbgs() << ", free use at " << Use << ".\n");
+        SlotIndex SegStart = SE.enterIntvBefore(Use);
+        assert(SegStart >= IP.second && "Couldn't avoid interference");
+        assert(SegStart < BI.LastSplitPoint && "Impossible split point");
+        SE.useIntv(SegStart, Stop);
+        continue;
+      }
+    }
+
+    // Interference is after the last use.
+    DEBUG(dbgs() << " after last use.\n");
+    SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+    assert(SegStart >= IP.second && "Couldn't avoid interference");
+  }
+
+  // Now all defs leading to live bundles are handled, do everything else.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Is the register live-in?
+    if (!BI.LiveIn || !RegIn)
+      continue;
+
+    // We have an incoming register. Check for interference.
+    IndexPair &IP = InterferenceRanges[i];
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+    DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
+                 << " -> BB#" << BI.MBB->getNumber());
+
+    // Check interference entering the block.
+    if (!IP.first.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        if (RegOut) {
+          DEBUG(dbgs() << ", no uses, live-through.\n");
+          SE.useIntv(Start, Stop);
+        } else {
+          DEBUG(dbgs() << ", no uses, stack-out.\n");
+          SE.leaveIntvAtTop(*BI.MBB);
+        }
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", killed in block.\n");
+        SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill));
+        continue;
+      }
+      if (!RegOut) {
+        // Block is live-through, but exit bundle is on the stack.
+        // Spill immediately after the last use.
+        if (BI.LastUse < BI.LastSplitPoint) {
+          DEBUG(dbgs() << ", uses, stack-out.\n");
+          SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse));
+          continue;
+        }
+        // The last use is after the last split point, it is probably an
+        // indirect jump.
+        DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
+                     << BI.LastSplitPoint << ", stack-out.\n");
+        SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint);
+        SE.useIntv(Start, SegEnd);
+        // Run a double interval from the split to the last use.
+        // This makes it possible to spill the complement without affecting the
+        // indirect branch.
+        SE.overlapIntv(SegEnd, BI.LastUse);
+        continue;
+      }
+      // Register is live-through.
+      DEBUG(dbgs() << ", uses, live-through.\n");
+      SE.useIntv(Start, Stop);
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference from " << IP.first);
+
+    if (!BI.LiveThrough && IP.first >= BI.Kill) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
+      SE.useIntv(Start, BI.Kill);
+      continue;
+    }
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by spilling as soon as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      continue;
+    }
+    if (IP.first.getBaseIndex() > BI.FirstUse) {
+      // There are interference-free uses at the beginning of the block.
+      // Find the last use that can get the register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.first.getBaseIndex());
+      assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
+      SlotIndex Use = (--UI)->getBoundaryIndex();
+      DEBUG(dbgs() << ", free use at " << *UI << ".\n");
+      SlotIndex SegEnd = SE.leaveIntvAfter(Use);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      SE.useIntv(Start, SegEnd);
+      continue;
+    }
+
+    // Interference is before the first use.
+    DEBUG(dbgs() << " before first use.\n");
+    SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+    assert(SegEnd <= IP.first && "Couldn't avoid interference");
+  }
+
+  SE.closeIntv();
+
+  // FIXME: Should we be more aggressive about splitting the stack region into
+  // per-block segments? The current approach allows the stack region to
+  // separate into connected components. Some components may be allocatable.
+  SE.finish();
+  ++NumGlobalSplits;
+
+  if (VerifyEnabled) {
+    MF->verify(this, "After splitting live range around region");
+
+#ifndef NDEBUG
+    // Make sure that at least one of the new intervals can allocate to PhysReg.
+    // That was the whole point of splitting the live range.
+    bool found = false;
+    for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
+         ++I)
+      if (!checkUncachedInterference(**I, PhysReg)) {
+        found = true;
+        break;
+      }
+    assert(found && "No allocatable intervals after pointless splitting");
+#endif
+  }
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  BitVector LiveBundles, BestBundles;
+  float BestCost = 0;
+  unsigned BestReg = 0;
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Cost = calcInterferenceInfo(VirtReg, PhysReg);
+    if (BestReg && Cost >= BestCost)
+      continue;
+
+    SpillPlacer->placeSpills(SpillConstraints, LiveBundles);
+    // No live bundles, defer to splitSingleBlocks().
+    if (!LiveBundles.any())
+      continue;
+
+    Cost += calcGlobalSplitCost(LiveBundles);
+    if (!BestReg || Cost < BestCost) {
+      BestReg = PhysReg;
+      BestCost = Cost;
+      BestBundles.swap(LiveBundles);
+    }
+  }
+
+  if (!BestReg)
+    return 0;
+
+  splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                             Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+                              SmallVectorImpl<float> &GapWeight) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned NumGaps = Uses.size()-1;
+
+  // Start and end points for the interference check.
+  SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+  SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+  GapWeight.assign(NumGaps, 0.0f);
+
+  // Add interference from each overlapping register.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+           .checkInterference())
+      continue;
+
+    // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+    // so we don't need InterferenceQuery.
+    //
+    // Interference that overlaps an instruction is counted in both gaps
+    // surrounding the instruction. The exception is interference before
+    // StartIdx and after StopIdx.
+    //
+    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+      // Skip the gaps before IntI.
+      while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+        if (++Gap == NumGaps)
+          break;
+      if (Gap == NumGaps)
+        break;
+
+      // Update the gaps covered by IntI.
+      const float weight = IntI.value()->weight;
+      for (; Gap != NumGaps; ++Gap) {
+        GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+        if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+          break;
+      }
+      if (Gap == NumGaps)
+        break;
+    }
+  }
+}
+
+/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
+/// before MI that has a slot index. If MI is the first mapped instruction in
+/// its block, return the block start index instead.
+///
+SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
+  assert(MI && "Missing MachineInstr");
+  const MachineBasicBlock *MBB = MI->getParent();
+  MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
+  while (I != B)
+    if (!(--I)->isDebugValue() && !I->isCopy())
+      return Indexes->getInstructionIndex(I);
+  return Indexes->getMBBStartIdx(MBB);
+}
+
+/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
+/// real non-copy instruction for each instruction in SA->UseSlots.
+///
+void RAGreedy::calcPrevSlots() {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  PrevSlot.clear();
+  PrevSlot.reserve(Uses.size());
+  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+    const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
+    PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
+  }
+}
+
+/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
+/// be beneficial to split before UseSlots[i].
+///
+/// 0 is always a valid split point
+unsigned RAGreedy::nextSplitPoint(unsigned i) {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned Size = Uses.size();
+  assert(i != Size && "No split points after the end");
+  // Allow split before i when Uses[i] is not adjacent to the previous use.
+  while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
+    ;
+  return i;
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+
+  // Note that it is possible to have an interval that is live-in or live-out
+  // while only covering a single block - A phi-def can use undef values from
+  // predecessors, and the block could be a single-block loop.
+  // We don't bother doing anything clever about such a case, we simply assume
+  // that the interval is continuous from FirstUse to LastUse. We should make
+  // sure that we don't do anything illegal to such an interval, though.
+
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  if (Uses.size() <= 2)
+    return 0;
+  const unsigned NumGaps = Uses.size()-1;
+
+  DEBUG({
+    dbgs() << "tryLocalSplit: ";
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      dbgs() << ' ' << SA->UseSlots[i];
+    dbgs() << '\n';
+  });
+
+  // For every use, find the previous mapped non-copy instruction.
+  // We use this to detect valid split points, and to estimate new interval
+  // sizes.
+  calcPrevSlots();
+
+  unsigned BestBefore = NumGaps;
+  unsigned BestAfter = 0;
+  float BestDiff = 0;
+
+  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB);
+  SmallVector<float, 8> GapWeight;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    // Keep track of the largest spill weight that would need to be evicted in
+    // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+    calcGapWeights(PhysReg, GapWeight);
+
+    // Try to find the best sequence of gaps to close.
+    // The new spill weight must be larger than any gap interference.
+
+    // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+    unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+
+    // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+    // It is the spill weight that needs to be evicted.
+    float MaxGap = GapWeight[0];
+    for (unsigned i = 1; i != SplitAfter; ++i)
+      MaxGap = std::max(MaxGap, GapWeight[i]);
+
+    for (;;) {
+      // Live before/after split?
+      const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+      const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+                   << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+                   << " i=" << MaxGap);
+
+      // Stop before the interval gets so big we wouldn't be making progress.
+      if (!LiveBefore && !LiveAfter) {
+        DEBUG(dbgs() << " all\n");
+        break;
+      }
+      // Should the interval be extended or shrunk?
+      bool Shrink = true;
+      if (MaxGap < HUGE_VALF) {
+        // Estimate the new spill weight.
+        //
+        // Each instruction reads and writes the register, except the first
+        // instr doesn't read when !FirstLive, and the last instr doesn't write
+        // when !LastLive.
+        //
+        // We will be inserting copies before and after, so the total number of
+        // reads and writes is 2 * EstUses.
+        //
+        const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
+                                 2*(LiveBefore + LiveAfter);
+
+        // Try to guess the size of the new interval. This should be trivial,
+        // but the slot index of an inserted copy can be a lot smaller than the
+        // instruction it is inserted before if there are many dead indexes
+        // between them.
+        //
+        // We measure the distance from the instruction before SplitBefore to
+        // get a conservative estimate.
+        //
+        // The final distance can still be different if inserting copies
+        // triggers a slot index renumbering.
+        //
+        const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
+                              PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+        // Would this split be possible to allocate?
+        // Never allocate all gaps, we wouldn't be making progress.
+        float Diff = EstWeight - MaxGap;
+        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
+        if (Diff > 0) {
+          Shrink = false;
+          if (Diff > BestDiff) {
+            DEBUG(dbgs() << " (best)");
+            BestDiff = Diff;
+            BestBefore = SplitBefore;
+            BestAfter = SplitAfter;
+          }
+        }
+      }
+
+      // Try to shrink.
+      if (Shrink) {
+        SplitBefore = nextSplitPoint(SplitBefore);
+        if (SplitBefore < SplitAfter) {
+          DEBUG(dbgs() << " shrink\n");
+          // Recompute the max when necessary.
+          if (GapWeight[SplitBefore - 1] >= MaxGap) {
+            MaxGap = GapWeight[SplitBefore];
+            for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+              MaxGap = std::max(MaxGap, GapWeight[i]);
+          }
+          continue;
+        }
+        MaxGap = 0;
+      }
+
+      // Try to extend the interval.
+      if (SplitAfter >= NumGaps) {
+        DEBUG(dbgs() << " end\n");
+        break;
+      }
+
+      DEBUG(dbgs() << " extend\n");
+      for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
+           SplitAfter != e; ++SplitAfter)
+        MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
+          continue;
+    }
+  }
+
+  // Didn't find any candidates?
+  if (BestBefore == NumGaps)
+    return 0;
+
+  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+               << '-' << Uses[BestAfter] << ", " << BestDiff
+               << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+  SmallVector<LiveInterval*, 4> SpillRegs;
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+  SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+  SE.openIntv();
+  SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]);
+  SlotIndex SegStop  = SE.leaveIntvAfter(Uses[BestAfter]);
+  SE.useIntv(SegStart, SegStop);
+  SE.closeIntv();
+  SE.finish();
+  ++NumLocalSplits;
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                          Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*>&NewVRegs) {
+  SA->analyze(&VirtReg);
+
+  // Local intervals are handled separately.
+  if (LIS->intervalIsInOneMBB(VirtReg)) {
+    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    return tryLocalSplit(VirtReg, Order, NewVRegs);
+  }
+
+  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+  // First try to split around a region spanning multiple blocks.
+  unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Then isolate blocks with multiple uses.
+  SplitAnalysis::BlockPtrSet Blocks;
+  if (SA->getMultiUseBlocks(Blocks)) {
+    SmallVector<LiveInterval*, 4> SpillRegs;
+    LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+    SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks);
+    if (VerifyEnabled)
+      MF->verify(this, "After splitting live range around basic blocks");
+  }
+
+  // Don't assign any physregs.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                Spilling
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceWeight - Calculate the combined spill weight of
+/// interferences when assigning VirtReg to PhysReg.
+float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){
+  float Sum = 0;
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+    Q.collectInterferingVRegs();
+    if (Q.seenUnspillableVReg())
+      return HUGE_VALF;
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i)
+      Sum += Q.interferingVRegs()[i]->weight;
+  }
+  return Sum;
+}
+
+/// trySpillInterferences - Try to spill interfering registers instead of the
+/// current one. Only do it if the accumulated spill weight is smaller than the
+/// current spill weight.
+unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg,
+                                         AllocationOrder &Order,
+                                     SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled);
+  unsigned BestPhys = 0;
+  float BestWeight = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Weight = calcInterferenceWeight(VirtReg, PhysReg);
+    if (Weight == HUGE_VALF || Weight >= VirtReg.weight)
+      continue;
+    if (!BestPhys || Weight < BestWeight)
+      BestPhys = PhysReg, BestWeight = Weight;
+  }
+
+  // No candidates found.
+  if (!BestPhys)
+    return 0;
+
+  // Collect all interfering registers.
+  SmallVector<LiveInterval*, 8> Spills;
+  for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+    Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end());
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *VReg = Q.interferingVRegs()[i];
+      unassign(*VReg, *AI);
+    }
+  }
+
+  // Spill them all.
+  DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight "
+               << BestWeight << '\n');
+  for (unsigned i = 0, e = Spills.size(); i != e; ++i)
+    spiller().spill(Spills[i], NewVRegs, Spills);
+  return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // First try assigning a free register.
+  AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      return PhysReg;
+  }
+
+  // Try to reassign interferences.
+  if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+  // Try splitting VirtReg or interferences.
+  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Try to spill another interfering reg with less spill weight.
+  PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs);
+  if (PhysReg)
+    return PhysReg;
+
+  // Finally spill VirtReg itself.
+  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+  SmallVector<LiveInterval*, 1> pendingSpills;
+  spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  if (VerifyEnabled)
+    MF->verify(this, "Before greedy register allocator");
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+  Indexes = &getAnalysis<SlotIndexes>();
+  DomTree = &getAnalysis<MachineDominatorTree>();
+  ReservedRegs = TRI->getReservedRegs(*MF);
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+  Loops = &getAnalysis<MachineLoopInfo>();
+  LoopRanges = &getAnalysis<MachineLoopRanges>();
+  Bundles = &getAnalysis<EdgeBundles>();
+  SpillPlacer = &getAnalysis<SpillPlacement>();
+
+  SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+
+  allocatePhysRegs();
+  addMBBLiveIns(MF);
+  LIS->addKillFlags();
+
+  // Run rewriter
+  {
+    NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+    VRM->rewrite(Indexes);
+  }
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c62354a8872..b959878bcdba 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -12,13 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
 #include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -91,6 +92,19 @@ namespace {
   struct RALinScan : public MachineFunctionPass {
     static char ID;
     RALinScan() : MachineFunctionPass(ID) {
+      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeRegisterCoalescerAnalysisGroup(
+        *PassRegistry::getPassRegistry());
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      
       // Initialize the queue to record recently-used registers.
       if (NumRecentlyUsedRegs > 0)
         RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -127,7 +141,6 @@ namespace {
     BitVector allocatableRegs_;
     BitVector reservedRegs_;
     LiveIntervals* li_;
-    LiveStacks* ls_;
     MachineLoopInfo *loopInfo;
 
     /// handled_ - Intervals are added to the handled_ set in the order of their
@@ -183,6 +196,8 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<SlotIndexes>();
       if (StrongPHIElim)
@@ -193,12 +208,15 @@ namespace {
       AU.addRequired<CalculateSpillWeights>();
       if (PreSplitIntervals)
         AU.addRequiredID(PreAllocSplittingID);
-      AU.addRequired<LiveStacks>();
-      AU.addPreserved<LiveStacks>();
+      AU.addRequiredID(LiveStacksID);
+      AU.addPreservedID(LiveStacksID);
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addRequired<VirtRegMap>();
       AU.addPreserved<VirtRegMap>();
+      AU.addRequired<LiveDebugVariables>();
+      AU.addPreserved<LiveDebugVariables>();
+      AU.addRequiredID(MachineDominatorsID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -370,8 +388,19 @@ namespace {
   char RALinScan::ID = 0;
 }
 
-INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
-                "Linear Scan Register Allocator", false, false);
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
 
 void RALinScan::ComputeRelatedRegClasses() {
   // First pass, add all reg classes to the union, and determine at least one
@@ -402,8 +431,12 @@ void RALinScan::ComputeRelatedRegClasses() {
     for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
          I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
          I != E; ++I)
-      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
-        RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+        const TargetRegisterClass *AliasClass = 
+          OneClassForEachPhysReg.lookup(*AS);
+        if (AliasClass)
+          RelatedRegClasses.unionSets(I->second, AliasClass);
+      }
 }
 
 /// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
@@ -431,8 +464,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
   unsigned CandReg;
   {
     MachineInstr *CopyMI;
-    if (vni->def != SlotIndex() && vni->isDefAccurate() &&
-        (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
       // Defined by a copy, try to extend SrcReg forward
       CandReg = CopyMI->getOperand(1).getReg();
     else if (TrivCoalesceEnds &&
@@ -442,6 +474,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
       CandReg = CopyMI->getOperand(0).getReg();
     else
       return Reg;
+
+    // If the target of the copy is a sub-register then don't coalesce.
+    if(CopyMI->getOperand(0).getSubReg())
+      return Reg;
   }
 
   if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
@@ -478,7 +514,6 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   allocatableRegs_ = tri_->getAllocatableSet(fn);
   reservedRegs_ = tri_->getReservedRegs(fn);
   li_ = &getAnalysis<LiveIntervals>();
-  ls_ = &getAnalysis<LiveStacks>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
   // We don't run the coalescer here because we have no reason to
@@ -505,6 +540,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   // Rewrite spill code and update the PhysRegsUsed set.
   rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
 
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
   assert(unhandled_.empty() && "Unhandled live intervals remain!");
 
   finalizeRegUses();
@@ -638,8 +676,6 @@ void RALinScan::linearScan() {
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
-  if (ls_->getNumIntervals() == 0)
-    return;
   if (!vrm_->FindUnusedRegisters(li_))
     return;
 }
@@ -784,30 +820,6 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
   }
 }
 
-/// addStackInterval - Create a LiveInterval for stack if the specified live
-/// interval has been spilled.
-static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
-                             LiveIntervals *li_,
-                             MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
-  int SS = vrm_.getStackSlot(cur->reg);
-  if (SS == VirtRegMap::NO_STACK_SLOT)
-    return;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
-
-  VNInfo *VNI;
-  if (SI.hasAtLeastOneValue())
-    VNI = SI.getValNumInfo(0);
-  else
-    VNI = SI.getNextValue(SlotIndex(), 0, false,
-                          ls_->getVNInfoAllocator());
-
-  LiveInterval &RI = li_->getInterval(cur->reg);
-  // FIXME: This may be overly conservative.
-  SI.MergeRangesInAsValue(RI, VNI);
-}
-
 /// getConflictWeight - Return the number of conflicts between cur
 /// live interval and defs and uses of Reg weighted by loop depthes.
 static
@@ -925,13 +937,9 @@ LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
 }
 
 void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
-  bool isNew = DowngradedRegs.insert(Reg);
-  isNew = isNew; // Silence compiler warning.
-  assert(isNew && "Multiple reloads holding the same register?");
-  DowngradeMap.insert(std::make_pair(li->reg, Reg));
-  for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
-    isNew = DowngradedRegs.insert(*AS);
-    isNew = isNew; // Silence compiler warning.
+  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+    bool isNew = DowngradedRegs.insert(*AS);
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Multiple reloads holding the same register?");
     DowngradeMap.insert(std::make_pair(li->reg, *AS));
   }
@@ -957,10 +965,11 @@ namespace {
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
 void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  DEBUG(dbgs() << "\tallocating current interval: ");
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  DEBUG(dbgs() << "\tallocating current interval from "
+               << RC->getName() << ": ");
 
   // This is an implicitly defined live interval, just assign any register.
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
   if (cur->empty()) {
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
@@ -984,8 +993,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
   if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
     VNInfo *vni = cur->begin()->valno;
-    if ((vni->def != SlotIndex()) && !vni->isUnused() &&
-         vni->isDefAccurate()) {
+    if (!vni->isUnused()) {
       MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
       if (CopyMI && CopyMI->isCopy()) {
         unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1225,7 +1233,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     spiller_->spill(cur, added, spillIs);
 
     std::sort(added.begin(), added.end(), LISorter());
-    addStackInterval(cur, ls_, li_, mri_, *vrm_);
     if (added.empty())
       return;  // Early exit if all spills were folded.
 
@@ -1300,7 +1307,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     if (sli->beginIndex() < earliestStart)
       earliestStart = sli->beginIndex();
     spiller_->spill(sli, added, spillIs);
-    addStackInterval(sli, ls_, li_, mri_, *vrm_);
     spilled.insert(sli->reg);
   }
 
@@ -1419,8 +1425,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
   // Resolve second part of the hint (if possible) given the current allocation.
   unsigned physReg = Hint.second;
-  if (physReg &&
-      TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
   TargetRegisterClass::iterator I, E;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61f337bab49c..ea0d1fe0233f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,9 +31,6 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "PBQP/HeuristicSolver.h"
-#include "PBQP/Graph.h"
-#include "PBQP/Heuristics/Briggs.h"
 #include "RenderMachineFunction.h"
 #include "Splitter.h"
 #include "VirtRegMap.h"
@@ -41,9 +38,13 @@
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
@@ -51,7 +52,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <limits>
-#include <map>
 #include <memory>
 #include <set>
 #include <vector>
@@ -60,7 +60,7 @@ using namespace llvm;
 
 static RegisterRegAlloc
 registerPBQPRepAlloc("pbqp", "PBQP register allocator",
-                       llvm::createPBQPRegisterAllocator);
+                       createDefaultPBQPRegisterAllocator);
 
 static cl::opt<bool>
 pbqpCoalescing("pbqp-coalescing",
@@ -69,698 +69,471 @@ pbqpCoalescing("pbqp-coalescing",
 
 static cl::opt<bool>
 pbqpPreSplitting("pbqp-pre-splitting",
-                 cl::desc("Pre-splite before PBQP register allocation."),
+                 cl::desc("Pre-split before PBQP register allocation."),
                  cl::init(false), cl::Hidden);
 
 namespace {
 
-  ///
-  /// PBQP based allocators solve the register allocation problem by mapping
-  /// register allocation problems to Partitioned Boolean Quadratic
-  /// Programming problems.
-  class PBQPRegAlloc : public MachineFunctionPass {
-  public:
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  /// Construct a PBQP register allocator.
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
+      : MachineFunctionPass(ID), builder(b) {
+    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+    initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+  }
 
-    static char ID;
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "PBQP Register Allocator";
+  }
 
-    /// Construct a PBQP register allocator.
-    PBQPRegAlloc() : MachineFunctionPass(ID) {}
+  /// PBQP analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
-    /// Return the pass name.
-    virtual const char* getPassName() const {
-      return "PBQP Register Allocator";
-    }
+  /// Perform register allocation
+  virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    /// PBQP analysis usage.
-    virtual void getAnalysisUsage(AnalysisUsage &au) const {
-      au.addRequired<SlotIndexes>();
-      au.addPreserved<SlotIndexes>();
-      au.addRequired<LiveIntervals>();
-      //au.addRequiredID(SplitCriticalEdgesID);
-      au.addRequired<RegisterCoalescer>();
-      au.addRequired<CalculateSpillWeights>();
-      au.addRequired<LiveStacks>();
-      au.addPreserved<LiveStacks>();
-      au.addRequired<MachineLoopInfo>();
-      au.addPreserved<MachineLoopInfo>();
-      if (pbqpPreSplitting)
-        au.addRequired<LoopSplitter>();
-      au.addRequired<VirtRegMap>();
-      au.addRequired<RenderMachineFunction>();
-      MachineFunctionPass::getAnalysisUsage(au);
-    }
+private:
 
-    /// Perform register allocation
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+  typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+  typedef std::vector<const LiveInterval*> Node2LIMap;
+  typedef std::vector<unsigned> AllowedSet;
+  typedef std::vector<AllowedSet> AllowedSetMap;
+  typedef std::pair<unsigned, unsigned> RegPair;
+  typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+  typedef std::set<unsigned> RegSet;
 
-  private:
 
-    class LIOrdering {
-    public:
-      bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
-        return li1->reg < li2->reg;
-      }
-    };
-
-    typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
-    typedef std::vector<const LiveInterval*> Node2LIMap;
-    typedef std::vector<unsigned> AllowedSet;
-    typedef std::vector<AllowedSet> AllowedSetMap;
-    typedef std::set<unsigned> RegSet;
-    typedef std::pair<unsigned, unsigned> RegPair;
-    typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-
-    typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
-
-    typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
-
-    MachineFunction *mf;
-    const TargetMachine *tm;
-    const TargetRegisterInfo *tri;
-    const TargetInstrInfo *tii;
-    const MachineLoopInfo *loopInfo;
-    MachineRegisterInfo *mri;
-    RenderMachineFunction *rmf;
-
-    LiveIntervals *lis;
-    LiveStacks *lss;
-    VirtRegMap *vrm;
-
-    LI2NodeMap li2Node;
-    Node2LIMap node2LI;
-    AllowedSetMap allowedSets;
-    LiveIntervalSet vregIntervalsToAlloc,
-                    emptyVRegIntervals;
-    NodeVector problemNodes;
-
-
-    /// Builds a PBQP cost vector.
-    template <typename RegContainer>
-    PBQP::Vector buildCostVector(unsigned vReg,
-                                 const RegContainer &allowed,
-                                 const CoalesceMap &cealesces,
-                                 PBQP::PBQPNum spillCost) const;
-
-    /// \brief Builds a PBQP interference matrix.
-    ///
-    /// @return Either a pointer to a non-zero PBQP matrix representing the
-    ///         allocation option costs, or a null pointer for a zero matrix.
-    ///
-    /// Expects allowed sets for two interfering LiveIntervals. These allowed
-    /// sets should contain only allocable registers from the LiveInterval's
-    /// register class, with any interfering pre-colored registers removed.
-    template <typename RegContainer>
-    PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
-                                          const RegContainer &allowed2) const;
-
-    ///
-    /// Expects allowed sets for two potentially coalescable LiveIntervals,
-    /// and an estimated benefit due to coalescing. The allowed sets should
-    /// contain only allocable registers from the LiveInterval's register
-    /// classes, with any interfering pre-colored registers removed.
-    template <typename RegContainer>
-    PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
-                                        const RegContainer &allowed2,
-                                        PBQP::PBQPNum cBenefit) const;
-
-    /// \brief Finds coalescing opportunities and returns them as a map.
-    ///
-    /// Any entries in the map are guaranteed coalescable, even if their
-    /// corresponding live intervals overlap.
-    CoalesceMap findCoalesces();
-
-    /// \brief Finds the initial set of vreg intervals to allocate.
-    void findVRegIntervalsToAlloc();
-
-    /// \brief Constructs a PBQP problem representation of the register
-    /// allocation problem for this function.
-    ///
-    /// @return a PBQP solver object for the register allocation problem.
-    PBQP::Graph constructPBQPProblem();
-
-    /// \brief Adds a stack interval if the given live interval has been
-    /// spilled. Used to support stack slot coloring.
-    void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
-    /// \brief Given a solved PBQP problem maps this solution back to a register
-    /// assignment.
-    bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
-
-    /// \brief Postprocessing before final spilling. Sets basic block "live in"
-    /// variables.
-    void finalizeAlloc() const;
-
-  };
-
-  char PBQPRegAlloc::ID = 0;
-}
+  std::auto_ptr<PBQPBuilder> builder;
 
+  MachineFunction *mf;
+  const TargetMachine *tm;
+  const TargetRegisterInfo *tri;
+  const TargetInstrInfo *tii;
+  const MachineLoopInfo *loopInfo;
+  MachineRegisterInfo *mri;
+  RenderMachineFunction *rmf;
 
-template <typename RegContainer>
-PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
-                                           const RegContainer &allowed,
-                                           const CoalesceMap &coalesces,
-                                           PBQP::PBQPNum spillCost) const {
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  VirtRegMap *vrm;
 
-  typedef typename RegContainer::const_iterator AllowedItr;
+  RegSet vregsToAlloc, emptyIntervalVRegs;
 
-  // Allocate vector. Additional element (0th) used for spill option
-  PBQP::Vector v(allowed.size() + 1, 0);
+  /// \brief Finds the initial set of vreg intervals to allocate.
+  void findVRegIntervalsToAlloc();
 
-  v[0] = spillCost;
+  /// \brief Adds a stack interval if the given live interval has been
+  /// spilled. Used to support stack slot coloring.
+  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
 
-  // Iterate over the allowed registers inserting coalesce benefits if there
-  // are any.
-  unsigned ai = 0;
-  for (AllowedItr itr = allowed.begin(), end = allowed.end();
-       itr != end; ++itr, ++ai) {
+  /// \brief Given a solved PBQP problem maps this solution back to a register
+  /// assignment.
+  bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                         const PBQP::Solution &solution);
 
-    unsigned pReg = *itr;
+  /// \brief Postprocessing before final spilling. Sets basic block "live in"
+  /// variables.
+  void finalizeAlloc() const;
 
-    CoalesceMap::const_iterator cmItr =
-      coalesces.find(RegPair(vReg, pReg));
+};
 
-    // No coalesce - on to the next preg.
-    if (cmItr == coalesces.end())
-      continue;
+char RegAllocPBQP::ID = 0;
 
-    // We have a coalesce - insert the benefit.
-    v[ai + 1] = -cmItr->second;
-  }
+} // End anonymous namespace.
 
-  return v;
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+  Node2VReg::const_iterator vregItr = node2VReg.find(node);
+  assert(vregItr != node2VReg.end() && "No vreg for node.");
+  return vregItr->second;
 }
 
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
-      const RegContainer &allowed1, const RegContainer &allowed2) const {
-
-  typedef typename RegContainer::const_iterator RegContainerIterator;
-
-  // Construct a PBQP matrix representing the cost of allocation options. The
-  // rows and columns correspond to the allocation options for the two live
-  // intervals.  Elements will be infinite where corresponding registers alias,
-  // since we cannot allocate aliasing registers to interfering live intervals.
-  // All other elements (non-aliasing combinations) will have zero cost. Note
-  // that the spill option (element 0,0) has zero cost, since we can allocate
-  // both intervals to memory safely (the cost for each individual allocation
-  // to memory is accounted for by the cost vectors for each live interval).
-  PBQP::Matrix *m =
-    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
-  // Assume this is a zero matrix until proven otherwise.  Zero matrices occur
-  // between interfering live ranges with non-overlapping register sets (e.g.
-  // non-overlapping reg classes, or disjoint sets of allowed regs within the
-  // same class). The term "overlapping" is used advisedly: sets which do not
-  // intersect, but contain registers which alias, will have non-zero matrices.
-  // We optimize zero matrices away to improve solver speed.
-  bool isZeroMatrix = true;
-
-
-  // Row index. Starts at 1, since the 0th row is for the spill option, which
-  // is always zero.
-  unsigned ri = 1;
-
-  // Iterate over allowed sets, insert infinities where required.
-  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
-       a1Itr != a1End; ++a1Itr) {
-
-    // Column index, starts at 1 as for row index.
-    unsigned ci = 1;
-    unsigned reg1 = *a1Itr;
-
-    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
-         a2Itr != a2End; ++a2Itr) {
-
-      unsigned reg2 = *a2Itr;
-
-      // If the row/column regs are identical or alias insert an infinity.
-      if (tri->regsOverlap(reg1, reg2)) {
-        (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
-        isZeroMatrix = false;
-      }
-
-      ++ci;
-    }
-
-    ++ri;
-  }
-
-  // If this turns out to be a zero matrix...
-  if (isZeroMatrix) {
-    // free it and return null.
-    delete m;
-    return 0;
-  }
-
-  // ...otherwise return the cost matrix.
-  return m;
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+  VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+  assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+  return nodeItr->second;
+  
 }
 
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
-      const RegContainer &allowed1, const RegContainer &allowed2,
-      PBQP::PBQPNum cBenefit) const {
-
-  typedef typename RegContainer::const_iterator RegContainerIterator;
-
-  // Construct a PBQP Matrix representing the benefits of coalescing. As with
-  // interference matrices the rows and columns represent allowed registers
-  // for the LiveIntervals which are (potentially) to be coalesced. The amount
-  // -cBenefit will be placed in any element representing the same register
-  // for both intervals.
-  PBQP::Matrix *m =
-    new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
-  // Reset costs to zero.
-  m->reset(0);
-
-  // Assume the matrix is zero till proven otherwise. Zero matrices will be
-  // optimized away as in the interference case.
-  bool isZeroMatrix = true;
-
-  // Row index. Starts at 1, since the 0th row is for the spill option, which
-  // is always zero.
-  unsigned ri = 1;
-
-  // Iterate over the allowed sets, insert coalescing benefits where
-  // appropriate.
-  for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
-       a1Itr != a1End; ++a1Itr) {
-
-    // Column index, starts at 1 as for row index.
-    unsigned ci = 1;
-    unsigned reg1 = *a1Itr;
-
-    for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
-         a2Itr != a2End; ++a2Itr) {
-
-      // If the row and column represent the same register insert a beneficial
-      // cost to preference this allocation - it would allow us to eliminate a
-      // move instruction.
-      if (reg1 == *a2Itr) {
-        (*m)[ri][ci] = -cBenefit;
-        isZeroMatrix = false;
-      }
-
-      ++ci;
-    }
-
-    ++ri;
-  }
-
-  // If this turns out to be a zero matrix...
-  if (isZeroMatrix) {
-    // ...free it and return null.
-    delete m;
-    return 0;
-  }
-
-  return m;
+const PBQPRAProblem::AllowedSet&
+  PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+  AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+  assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+  const AllowedSet &allowedSet = allowedSetItr->second;
+  return allowedSet;
 }
 
-PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
-
-  typedef MachineFunction::const_iterator MFIterator;
-  typedef MachineBasicBlock::const_iterator MBBIterator;
-  typedef LiveInterval::const_vni_iterator VNIIterator;
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+  assert(isPRegOption(vreg, option) && "Not a preg option.");
 
-  CoalesceMap coalescesFound;
+  const AllowedSet& allowedSet = getAllowedSet(vreg);
+  assert(option <= allowedSet.size() && "Option outside allowed set.");
+  return allowedSet[option - 1];
+}
 
-  // To find coalesces we need to iterate over the function looking for
-  // copy instructions.
-  for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
-       bbItr != bbEnd; ++bbItr) {
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
 
-    const MachineBasicBlock *mbb = &*bbItr;
+  typedef std::vector<const LiveInterval*> LIVector;
 
-    for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
-         iItr != iEnd; ++iItr) {
+  MachineRegisterInfo *mri = &mf->getRegInfo();
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
 
-      const MachineInstr *instr = &*iItr;
+  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  PBQP::Graph &g = p->getGraph();
+  RegSet pregs;
 
-      // If this isn't a copy then continue to the next instruction.
-      if (!instr->isCopy())
-        continue;
-
-      unsigned srcReg = instr->getOperand(1).getReg();
-      unsigned dstReg = instr->getOperand(0).getReg();
+  // Collect the set of preg intervals, record that they're used in the MF.
+  for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+      pregs.insert(itr->first);
+      mri->setPhysRegUsed(itr->first);
+    }
+  }
 
-      // If the registers are already the same our job is nice and easy.
-      if (dstReg == srcReg)
-        continue;
+  BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+  // Iterate over vregs. 
+  for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+       vregItr != vregEnd; ++vregItr) {
+    unsigned vreg = *vregItr;
+    const TargetRegisterClass *trc = mri->getRegClass(vreg);
+    const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+    // Compute an initial allowed set for the current vreg.
+    typedef std::vector<unsigned> VRAllowed;
+    VRAllowed vrAllowed;
+    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
+                                       aoEnd = trc->allocation_order_end(*mf);
+         aoItr != aoEnd; ++aoItr) {
+      unsigned preg = *aoItr;
+      if (!reservedRegs.test(preg)) {
+        vrAllowed.push_back(preg);
+      }
+    }
 
-      bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
-           dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+    // Remove any physical registers which overlap.
+    for (RegSet::const_iterator pregItr = pregs.begin(),
+                                pregEnd = pregs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
+      const LiveInterval *pregLI = &lis->getInterval(preg);
 
-      // If both registers are physical then we can't coalesce.
-      if (srcRegIsPhysical && dstRegIsPhysical)
+      if (pregLI->empty()) {
         continue;
+      }
 
-      // If it's a copy that includes two virtual register but the source and
-      // destination classes differ then we can't coalesce.
-      if (!srcRegIsPhysical && !dstRegIsPhysical &&
-          mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
+      if (!vregLI->overlaps(*pregLI)) {
         continue;
-
-      // If one is physical and one is virtual, check that the physical is
-      // allocatable in the class of the virtual.
-      if (srcRegIsPhysical && !dstRegIsPhysical) {
-        const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
-        if (std::find(dstRegClass->allocation_order_begin(*mf),
-                      dstRegClass->allocation_order_end(*mf), srcReg) ==
-            dstRegClass->allocation_order_end(*mf))
-          continue;
       }
-      if (!srcRegIsPhysical && dstRegIsPhysical) {
-        const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
-        if (std::find(srcRegClass->allocation_order_begin(*mf),
-                      srcRegClass->allocation_order_end(*mf), dstReg) ==
-            srcRegClass->allocation_order_end(*mf))
-          continue;
-      }
-
-      // If we've made it here we have a copy with compatible register classes.
-      // We can probably coalesce, but we need to consider overlap.
-      const LiveInterval *srcLI = &lis->getInterval(srcReg),
-                         *dstLI = &lis->getInterval(dstReg);
 
-      if (srcLI->overlaps(*dstLI)) {
-        // Even in the case of an overlap we might still be able to coalesce,
-        // but we need to make sure that no definition of either range occurs
-        // while the other range is live.
+      // Remove the register from the allowed set.
+      VRAllowed::iterator eraseItr =
+        std::find(vrAllowed.begin(), vrAllowed.end(), preg);
 
-        // Otherwise start by assuming we're ok.
-        bool badDef = false;
-
-        // Test all defs of the source range.
-        for (VNIIterator
-               vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
-               vniItr != vniEnd; ++vniItr) {
+      if (eraseItr != vrAllowed.end()) {
+        vrAllowed.erase(eraseItr);
+      }
 
-          // If we find a poorly defined def we err on the side of caution.
-          if (!(*vniItr)->def.isValid()) {
-            badDef = true;
-            break;
-          }
+      // Also remove any aliases.
+      const unsigned *aliasItr = tri->getAliasSet(preg);
+      if (aliasItr != 0) {
+        for (; *aliasItr != 0; ++aliasItr) {
+          VRAllowed::iterator eraseItr =
+            std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
 
-          // If we find a def that kills the coalescing opportunity then
-          // record it and break from the loop.
-          if (dstLI->liveAt((*vniItr)->def)) {
-            badDef = true;
-            break;
+          if (eraseItr != vrAllowed.end()) {
+            vrAllowed.erase(eraseItr);
           }
         }
+      }
+    }
 
-        // If we have a bad def give up, continue to the next instruction.
-        if (badDef)
-          continue;
-
-        // Otherwise test definitions of the destination range.
-        for (VNIIterator
-               vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
-               vniItr != vniEnd; ++vniItr) {
+    // Construct the node.
+    PBQP::Graph::NodeItr node = 
+      g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
 
-          // We want to make sure we skip the copy instruction itself.
-          if ((*vniItr)->getCopy() == instr)
-            continue;
+    // Record the mapping and allowed set in the problem.
+    p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
 
-          if (!(*vniItr)->def.isValid()) {
-            badDef = true;
-            break;
-          }
+    PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+        vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
 
-          if (srcLI->liveAt((*vniItr)->def)) {
-            badDef = true;
-            break;
-          }
-        }
+    addSpillCosts(g.getNodeCosts(node), spillCost);
+  }
 
-        // As before a bad def we give up and continue to the next instr.
-        if (badDef)
-          continue;
+  for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+         vr1Itr != vrEnd; ++vr1Itr) {
+    unsigned vr1 = *vr1Itr;
+    const LiveInterval &l1 = lis->getInterval(vr1);
+    const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+    for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+         vr2Itr != vrEnd; ++vr2Itr) {
+      unsigned vr2 = *vr2Itr;
+      const LiveInterval &l2 = lis->getInterval(vr2);
+      const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+      assert(!l2.empty() && "Empty interval in vreg set?");
+      if (l1.overlaps(l2)) {
+        PBQP::Graph::EdgeItr edge =
+          g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+                    PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+        addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
       }
-
-      // If we make it to here then either the ranges didn't overlap, or they
-      // did, but none of their definitions would prevent us from coalescing.
-      // We're good to go with the coalesce.
-
-      float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
-
-      coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
-      coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
     }
-
   }
 
-  return coalescesFound;
+  return p;
 }
 
-void PBQPRegAlloc::findVRegIntervalsToAlloc() {
-
-  // Iterate over all live ranges.
-  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
-       itr != end; ++itr) {
-
-    // Ignore physical ones.
-    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
-      continue;
-
-    LiveInterval *li = itr->second;
-
-    // If this live interval is non-empty we will use pbqp to allocate it.
-    // Empty intervals we allocate in a simple post-processing stage in
-    // finalizeAlloc.
-    if (!li->empty()) {
-      vregIntervalsToAlloc.insert(li);
-    }
-    else {
-      emptyVRegIntervals.insert(li);
-    }
-  }
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+                                PBQP::PBQPNum spillCost) {
+  costVec[0] = spillCost;
 }
 
-PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
-
-  typedef std::vector<const LiveInterval*> LIVector;
-  typedef std::vector<unsigned> RegVector;
+void PBQPBuilder::addInterferenceCosts(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    const TargetRegisterInfo *tri) {
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
 
-  // This will store the physical intervals for easy reference.
-  LIVector physIntervals;
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
 
-  // Start by clearing the old node <-> live interval mappings & allowed sets
-  li2Node.clear();
-  node2LI.clear();
-  allowedSets.clear();
-
-  // Populate physIntervals, update preg use:
-  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
-       itr != end; ++itr) {
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
 
-    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
-      physIntervals.push_back(itr->second);
-      mri->setPhysRegUsed(itr->second->reg);
+      if (tri->regsOverlap(preg1, preg2)) {
+        costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+      }
     }
   }
+}
 
-  // Iterate over vreg intervals, construct live interval <-> node number
-  //  mappings.
-  for (LiveIntervalSet::const_iterator
-       itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
-       itr != end; ++itr) {
-    const LiveInterval *li = *itr;
-
-    li2Node[li] = node2LI.size();
-    node2LI.push_back(li);
-  }
-
-  // Get the set of potential coalesces.
-  CoalesceMap coalesces;
-
-  if (pbqpCoalescing) {
-    coalesces = findCoalesces();
-  }
-
-  // Construct a PBQP solver for this problem
-  PBQP::Graph problem;
-  problemNodes.resize(vregIntervalsToAlloc.size());
-
-  // Resize allowedSets container appropriately.
-  allowedSets.resize(vregIntervalsToAlloc.size());
-
-  BitVector ReservedRegs = tri->getReservedRegs(*mf);
-
-  // Iterate over virtual register intervals to compute allowed sets...
-  for (unsigned node = 0; node < node2LI.size(); ++node) {
-
-    // Grab pointers to the interval and its register class.
-    const LiveInterval *li = node2LI[node];
-    const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+                                                MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
 
-    // Start by assuming all allocable registers in the class are allowed...
-    RegVector liAllowed;
-    TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
-    TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
-    for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
-      if (!ReservedRegs.test(*it))
-        liAllowed.push_back(*it);
+  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  PBQP::Graph &g = p->getGraph();
 
-    // Eliminate the physical registers which overlap with this range, along
-    // with all their aliases.
-    for (LIVector::iterator pItr = physIntervals.begin(),
-       pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+  const TargetMachine &tm = mf->getTarget();
+  CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
 
-      if (!li->overlaps(**pItr))
-        continue;
+  // Scan the machine function and add a coalescing cost whenever CoalescerPair
+  // gives the Ok.
+  for (MachineFunction::const_iterator mbbItr = mf->begin(),
+                                       mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    const MachineBasicBlock *mbb = &*mbbItr;
 
-      unsigned pReg = (*pItr)->reg;
-
-      // If we get here then the live intervals overlap, but we're still ok
-      // if they're coalescable.
-      if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
-        continue;
+    for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+                                           miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      const MachineInstr *mi = &*miItr;
 
-      // If we get here then we have a genuine exclusion.
+      if (!cp.setRegisters(mi)) {
+        continue; // Not coalescable.
+      }
 
-      // Remove the overlapping reg...
-      RegVector::iterator eraseItr =
-        std::find(liAllowed.begin(), liAllowed.end(), pReg);
+      if (cp.getSrcReg() == cp.getDstReg()) {
+        continue; // Already coalesced.
+      }
 
-      if (eraseItr != liAllowed.end())
-        liAllowed.erase(eraseItr);
+      unsigned dst = cp.getDstReg(),
+               src = cp.getSrcReg();
 
-      const unsigned *aliasItr = tri->getAliasSet(pReg);
+      const float copyFactor = 0.5; // Cost of copy relative to load. Current
+      // value plucked randomly out of the air.
+                                      
+      PBQP::PBQPNum cBenefit =
+        copyFactor * LiveIntervals::getSpillWeight(false, true,
+                                                   loopInfo->getLoopDepth(mbb));
 
-      if (aliasItr != 0) {
-        // ...and its aliases.
-        for (; *aliasItr != 0; ++aliasItr) {
-          RegVector::iterator eraseItr =
-            std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+      if (cp.isPhys()) {
+        if (!lis->isAllocatable(dst)) {
+          continue;
+        }
 
-          if (eraseItr != liAllowed.end()) {
-            liAllowed.erase(eraseItr);
+        const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+        unsigned pregOpt = 0;  
+        while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+          ++pregOpt;
+        }
+        if (pregOpt < allowed.size()) {
+          ++pregOpt; // +1 to account for spill option.
+          PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+          addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+        }
+      } else {
+        const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+        const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+        PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+        PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+        PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+        if (edge == g.edgesEnd()) {
+          edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+                                                      allowed2->size() + 1,
+                                                      0));
+        } else {
+          if (g.getEdgeNode1(edge) == node2) {
+            std::swap(node1, node2);
+            std::swap(allowed1, allowed2);
           }
         }
+            
+        addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+                           cBenefit);
       }
     }
+  }
 
-    // Copy the allowed set into a member vector for use when constructing cost
-    // vectors & matrices, and mapping PBQP solutions back to assignments.
-    allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+  return p;
+}
 
-    // Set the spill cost to the interval weight, or epsilon if the
-    // interval weight is zero
-    PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
-        li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+                                                   unsigned pregOption,
+                                                   PBQP::PBQPNum benefit) {
+  costVec[pregOption] += -benefit;
+}
 
-    // Build a cost vector for this interval.
-    problemNodes[node] =
-      problem.addNode(
-        buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    PBQP::PBQPNum benefit) {
 
-  }
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
 
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (preg1 == preg2) {
+        costMat[i + 1][j + 1] += -benefit;
+      } 
+    }
+  }
+}
 
-  // Now add the cost matrices...
-  for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
-    const LiveInterval *li = node2LI[node1];
 
-    // Test for live range overlaps and insert interference matrices.
-    for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
-      const LiveInterval *li2 = node2LI[node2];
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<SlotIndexes>();
+  au.addPreserved<SlotIndexes>();
+  au.addRequired<LiveIntervals>();
+  //au.addRequiredID(SplitCriticalEdgesID);
+  au.addRequired<RegisterCoalescer>();
+  au.addRequired<CalculateSpillWeights>();
+  au.addRequired<LiveStacks>();
+  au.addPreserved<LiveStacks>();
+  au.addRequired<MachineLoopInfo>();
+  au.addPreserved<MachineLoopInfo>();
+  if (pbqpPreSplitting)
+    au.addRequired<LoopSplitter>();
+  au.addRequired<VirtRegMap>();
+  au.addRequired<RenderMachineFunction>();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
 
-      CoalesceMap::const_iterator cmItr =
-        coalesces.find(RegPair(li->reg, li2->reg));
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
 
-      PBQP::Matrix *m = 0;
+  // Iterate over all live ranges.
+  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
 
-      if (cmItr != coalesces.end()) {
-        m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
-                                  cmItr->second);
-      }
-      else if (li->overlaps(*li2)) {
-        m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
-      }
+    // Ignore physical ones.
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+      continue;
 
-      if (m != 0) {
-        problem.addEdge(problemNodes[node1],
-                        problemNodes[node2],
-                        *m);
+    LiveInterval *li = itr->second;
 
-        delete m;
-      }
+    // If this live interval is non-empty we will use pbqp to allocate it.
+    // Empty intervals we allocate in a simple post-processing stage in
+    // finalizeAlloc.
+    if (!li->empty()) {
+      vregsToAlloc.insert(li->reg);
+    } else {
+      emptyIntervalVRegs.insert(li->reg);
     }
   }
-
-  assert(problem.getNumNodes() == allowedSets.size());
-/*
-  std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
-            << problem.getNumEdges() << " edges.\n";
-
-  problem.printDot(std::cerr);
-*/
-  // We're done, PBQP problem constructed - return it.
-  return problem;
 }
 
-void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
                                     MachineRegisterInfo* mri) {
   int stackSlot = vrm->getStackSlot(spilled->reg);
 
-  if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
     return;
+  }
 
   const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
   LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
 
   VNInfo *vni;
-  if (stackInterval.getNumValNums() != 0)
+  if (stackInterval.getNumValNums() != 0) {
     vni = stackInterval.getValNumInfo(0);
-  else
+  } else {
     vni = stackInterval.getNextValue(
-      SlotIndex(), 0, false, lss->getVNInfoAllocator());
+      SlotIndex(), 0, lss->getVNInfoAllocator());
+  }
 
   LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
   stackInterval.MergeRangesInAsValue(rhsInterval, vni);
 }
 
-bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
-
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                                     const PBQP::Solution &solution) {
   // Set to true if we have any spills
   bool anotherRoundNeeded = false;
 
   // Clear the existing allocation.
   vrm->clearAllVirt();
 
-  // Iterate over the nodes mapping the PBQP solution to a register assignment.
-  for (unsigned node = 0; node < node2LI.size(); ++node) {
-    unsigned virtReg = node2LI[node]->reg,
-             allocSelection = solution.getSelection(problemNodes[node]);
-
-
-    // If the PBQP solution is non-zero it's a physical register...
-    if (allocSelection != 0) {
-      // Get the physical reg, subtracting 1 to account for the spill option.
-      unsigned physReg = allowedSets[node][allocSelection - 1];
-
-      DEBUG(dbgs() << "VREG " << virtReg << " -> "
-                   << tri->getName(physReg) << "\n");
-
-      assert(physReg != 0);
-
-      // Add to the virt reg map and update the used phys regs.
-      vrm->assignVirt2Phys(virtReg, physReg);
-    }
-    // ...Otherwise it's a spill.
-    else {
-
-      // Make sure we ignore this virtual reg on the next round
-      // of allocation
-      vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
-
-      // Insert spill ranges for this live range
-      const LiveInterval *spillInterval = node2LI[node];
-      double oldSpillWeight = spillInterval->weight;
+  const PBQP::Graph &g = problem.getGraph();
+  // Iterate over the nodes mapping the PBQP solution to a register
+  // assignment.
+  for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+                                 nodeEnd = g.nodesEnd();
+       node != nodeEnd; ++node) {
+    unsigned vreg = problem.getVRegForNode(node);
+    unsigned alloc = solution.getSelection(node);
+
+    if (problem.isPRegOption(vreg, alloc)) {
+      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+      assert(preg != 0 && "Invalid preg selected.");
+      vrm->assignVirt2Phys(vreg, preg);      
+    } else if (problem.isSpillOption(vreg, alloc)) {
+      vregsToAlloc.erase(vreg);
+      const LiveInterval* spillInterval = &lis->getInterval(vreg);
+      double oldWeight = spillInterval->weight;
       SmallVector<LiveInterval*, 8> spillIs;
       rmf->rememberUseDefs(spillInterval);
       std::vector<LiveInterval*> newSpills =
@@ -768,42 +541,42 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       addStackInterval(spillInterval, mri);
       rmf->rememberSpills(spillInterval, newSpills);
 
-      (void) oldSpillWeight;
-      DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
-                   << oldSpillWeight << ", New vregs: ");
+      (void) oldWeight;
+      DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+                   << oldWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
       for (std::vector<LiveInterval*>::const_iterator
            itr = newSpills.begin(), end = newSpills.end();
            itr != end; ++itr) {
-
         assert(!(*itr)->empty() && "Empty spill range.");
-
         DEBUG(dbgs() << (*itr)->reg << " ");
-
-        vregIntervalsToAlloc.insert(*itr);
+        vregsToAlloc.insert((*itr)->reg);
       }
 
       DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
+    } else {
+      assert(false && "Unknown allocation option.");
     }
   }
 
   return !anotherRoundNeeded;
 }
 
-void PBQPRegAlloc::finalizeAlloc() const {
+
+void RegAllocPBQP::finalizeAlloc() const {
   typedef LiveIntervals::iterator LIIterator;
   typedef LiveInterval::Ranges::const_iterator LRIterator;
 
   // First allocate registers for the empty intervals.
-  for (LiveIntervalSet::const_iterator
-         itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+  for (RegSet::const_iterator
+         itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
          itr != end; ++itr) {
-    LiveInterval *li = *itr;
+    LiveInterval *li = &lis->getInterval(*itr);
 
     unsigned physReg = vrm->getRegAllocPref(li->reg);
 
@@ -828,11 +601,9 @@ void PBQPRegAlloc::finalizeAlloc() const {
     // Get the physical register for this interval
     if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
       reg = li->reg;
-    }
-    else if (vrm->isAssignedReg(li->reg)) {
+    } else if (vrm->isAssignedReg(li->reg)) {
       reg = vrm->getPhys(li->reg);
-    }
-    else {
+    } else {
       // Ranges which are assigned a stack slot only are ignored.
       continue;
     }
@@ -849,7 +620,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
       // Find the set of basic blocks which this range is live into...
       if (lis->findLiveInMBBs(lrItr->start, lrItr->end,  liveInMBBs)) {
         // And add the physreg for this interval to their live-in sets.
-        for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+        for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
           if (liveInMBBs[i] != entryMBB) {
             if (!liveInMBBs[i]->isLiveIn(reg)) {
               liveInMBBs[i]->addLiveIn(reg);
@@ -863,7 +634,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
 
 }
 
-bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
   mf = &MF;
   tm = &mf->getTarget();
@@ -894,7 +665,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   findVRegIntervalsToAlloc();
 
   // If there are non-empty intervals allocate them using pbqp.
-  if (!vregIntervalsToAlloc.empty()) {
+  if (!vregsToAlloc.empty()) {
 
     bool pbqpAllocComplete = false;
     unsigned round = 0;
@@ -902,11 +673,13 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      PBQP::Graph problem = constructPBQPProblem();
+      std::auto_ptr<PBQPRAProblem> problem =
+        builder->build(mf, lis, loopInfo, vregsToAlloc);
       PBQP::Solution solution =
-        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
+        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+          problem->getGraph());
 
-      pbqpAllocComplete = mapPBQPToRegAlloc(solution);
+      pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
 
       ++round;
     }
@@ -917,12 +690,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   rmf->renderMachineFunction("After PBQP register allocation.", vrm);
 
-  vregIntervalsToAlloc.clear();
-  emptyVRegIntervals.clear();
-  li2Node.clear();
-  node2LI.clear();
-  allowedSets.clear();
-  problemNodes.clear();
+  vregsToAlloc.clear();
+  emptyIntervalVRegs.clear();
 
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
@@ -934,9 +703,18 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
-FunctionPass* llvm::createPBQPRegisterAllocator() {
-  return new PBQPRegAlloc();
+FunctionPass* llvm::createPBQPRegisterAllocator(
+                                           std::auto_ptr<PBQPBuilder> builder) {
+  return new RegAllocPBQP(builder);
 }
 
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+  if (pbqpCoalescing) {
+    return createPBQPRegisterAllocator(
+             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+  } // else
+  return createPBQPRegisterAllocator(
+           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
 
 #undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 02b5539f0f4f..407559a211a0 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -24,7 +24,8 @@
 using namespace llvm;
 
 // Register the RegisterCoalescer interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", 
+                          SimpleRegisterCoalescing)
 char RegisterCoalescer::ID = 0;
 
 // RegisterCoalescer destructor: DO NOT move this to the header file
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 93426eecbbc1..cbfd5a23d63d 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -30,9 +30,14 @@
 using namespace llvm;
 
 char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
                 "Render machine functions (and related info) to HTML pages",
-                false, false);
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
 
 static cl::opt<std::string>
 outputFileSuffix("rmf-file-suffix",
@@ -458,14 +463,9 @@ namespace llvm {
          liItr != liEnd; ++liItr) {
       LiveInterval *li = liItr->second;
 
-      const TargetRegisterClass *liTRC;
-
       if (TargetRegisterInfo::isPhysicalRegister(li->reg))
         continue;
       
-      liTRC = mri->getRegClass(li->reg);
-     
-
       // For all ranges in the current interal.
       for (LiveInterval::iterator lrItr = li->begin(),
              lrEnd = li->end();
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
index 8d56a8292ac5..85719923c0c6 100644
--- a/lib/CodeGen/RenderMachineFunction.h
+++ b/lib/CodeGen/RenderMachineFunction.h
@@ -202,7 +202,9 @@ namespace llvm {
   public:
     static char ID;
 
-    RenderMachineFunction() : MachineFunctionPass(ID) {}
+    RenderMachineFunction() : MachineFunctionPass(ID) {
+      initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7d39dc496afe..3388889c9e91 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "pre-RA-sched"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
 
 ScheduleDAG::~ScheduleDAG() {}
 
+/// getInstrDesc helper to handle SDNodes.
+const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
+}
+
 /// dump - dump the schedule.
 void ScheduleDAG::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -68,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
 /// addPred - This adds the specified edge as a pred of the current node if
 /// not already.  It also adds the current node as a successor of the
 /// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
   // If this node already has this depenence, don't add a redundant one.
   for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
        I != E; ++I)
     if (*I == D)
-      return;
+      return false;
   // Now add a corresponding succ to N.
   SDep P = D;
   P.setSUnit(this);
@@ -99,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
     this->setDepthDirty();
     N->setHeightDirty();
   }
+  return true;
 }
 
 /// removePred - This removes the specified edge as a pred of the current
@@ -278,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 
   dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
   dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
   dbgs() << "  Latency            : " << Latency << "\n";
   dbgs() << "  Depth              : " << Depth << "\n";
   dbgs() << "  Height             : " << Height << "\n";
@@ -492,7 +501,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
 /// all nodes affected by the edge insertion. These nodes will later get new
 /// topological indexes by means of the Shift method.
 void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
-                                     bool& HasLoop) {
+                                     bool &HasLoop) {
   std::vector<const SUnit*> WorkList;
   WorkList.reserve(SUnits.size());
 
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 0a2fb3796a42..6b7a8c6491bd 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -57,7 +57,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       assert(I->getReg() && "Unknown physical register!");
       unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
       bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      isNew = isNew; // Silence compiler warning.
+      (void)isNew; // Silence compiler warning.
       assert(isNew && "Node emitted out of order - early");
       BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
         .addReg(I->getReg());
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ea93dd5c6663..f17023eabb72 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -16,6 +16,7 @@
 #include "ScheduleDAGInstrs.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,9 +33,9 @@ using namespace llvm;
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo &mli,
                                      const MachineDominatorTree &mdt)
-  : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
-    Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
-  MFI = mf.getFrameInfo();
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+    InstrItins(mf.getTarget().getInstrItineraryData()),
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
   DbgValueVec.clear();
 }
 
@@ -78,12 +79,12 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
   } while (1);
 }
 
-/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
 static const Value *getUnderlyingObject(const Value *V) {
   // First just call Value::getUnderlyingObject to let it do what it does.
   do {
-    V = V->getUnderlyingObject();
+    V = GetUnderlyingObject(V);
     // If it found an inttoptr, use special code to continue climing.
     if (Operator::getOpcode(V) != Instruction::IntToPtr)
       break;
@@ -141,6 +142,46 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
     }
 }
 
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+  ExitSU.setInstr(ExitMI);
+  bool AllDepKnown = ExitMI &&
+    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+  if (ExitMI && AllDepKnown) {
+    // If it's a call or a barrier, add dependencies on the defs and uses of
+    // instruction.
+    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = ExitMI->getOperand(i);
+      if (!MO.isReg() || MO.isDef()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+      Uses[Reg].push_back(&ExitSU);
+    }
+  } else {
+    // For others, e.g. fallthrough, conditional branch, assume the exit
+    // uses all the registers that are livein to the successor blocks.
+    SmallSet<unsigned, 8> Seen;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {    
+        unsigned Reg = *I;
+        if (Seen.insert(Reg))
+          Uses[Reg].push_back(&ExitSU);
+      }
+  }
+}
+
 void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // We'll be allocating one SUnit for each instruction, plus one for
   // the region exit node.
@@ -175,6 +216,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   // without emitting the info from the previous call.
   DbgValueVec.clear();
 
+  // Model data dependencies between instructions being scheduled and the
+  // ExitSU.
+  AddSchedBarrierDeps();
+
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
        MII != MIE; --MII) {
@@ -194,6 +239,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
+    SU->isCall = TID.isCall();
+    SU->isCommutable = TID.isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -228,6 +275,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
       for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
         SUnit *DefSU = DefList[i];
+        if (DefSU == &ExitSU)
+          continue;
         if (DefSU != SU &&
             (Kind != SDep::Output || !MO.isDead() ||
              !DefSU->getInstr()->registerDefIsDead(Reg)))
@@ -237,6 +286,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         std::vector<SUnit *> &DefList = Defs[*Alias];
         for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
           SUnit *DefSU = DefList[i];
+          if (DefSU == &ExitSU)
+            continue;
           if (DefSU != SU &&
               (Kind != SDep::Output || !MO.isDead() ||
                !DefSU->getInstr()->registerDefIsDead(*Alias)))
@@ -258,12 +309,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           // TODO: Perhaps we should get rid of
           // SpecialAddressLatency and just move this into
           // adjustSchedDependency for the targets that care about it.
-          if (SpecialAddressLatency != 0 && !UnitLatencies) {
+          if (SpecialAddressLatency != 0 && !UnitLatencies &&
+              UseSU != &ExitSU) {
             MachineInstr *UseMI = UseSU->getInstr();
             const TargetInstrDesc &UseTID = UseMI->getDesc();
             int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
             assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
-            if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+            if (RegUseIndex >= 0 &&
+                (UseTID.mayLoad() || UseTID.mayStore()) &&
                 (unsigned)RegUseIndex < UseTID.getNumOperands() &&
                 UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
               LDataLatency += SpecialAddressLatency;
@@ -357,7 +410,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() && 
          (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
@@ -446,6 +499,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
         // Treat all other stores conservatively.
         goto new_alias_chain;
       }
+
+      if (!ExitSU.isPred(SU))
+        // Push store's up a bit to avoid them getting in between cmp
+        // and branches.
+        ExitSU.addPred(SDep(SU, SDep::Order, 0,
+                            /*Reg=*/0, /*isNormalMemory=*/false,
+                            /*isMustAlias=*/false,
+                            /*isArtificial=*/true));
     } else if (TID.mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
@@ -498,23 +559,22 @@ void ScheduleDAGInstrs::FinishBlock() {
 }
 
 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-
   // Compute the latency for the node.
-  SU->Latency =
-    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+  if (!InstrItins || InstrItins->isEmpty()) {
+    SU->Latency = 1;
 
-  // Simplistic target-independent heuristic: assume that loads take
-  // extra time.
-  if (InstrItins.isEmpty())
+    // Simplistic target-independent heuristic: assume that loads take
+    // extra time.
     if (SU->getInstr()->getDesc().mayLoad())
       SU->Latency += 2;
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
 }
 
 void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
                                               SDep& dep) const {
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty())
+  if (!InstrItins || InstrItins->isEmpty())
     return;
   
   // For a data dependency with a known register...
@@ -528,14 +588,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
   MachineInstr *DefMI = Def->getInstr();
   int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
   if (DefIdx != -1) {
-    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
-                                              DefIdx);
-    if (DefCycle >= 0) {
-      MachineInstr *UseMI = Use->getInstr();
-      const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
-      // For all uses of the register, calculate the maxmimum latency
-      int Latency = -1;
+    const MachineOperand &MO = DefMI->getOperand(DefIdx);
+    if (MO.isReg() && MO.isImplicit() &&
+        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+      // This is an implicit def, getOperandLatency() won't return the correct
+      // latency. e.g.
+      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+      // What we want is to compute latency between def of %D6/%D7 and use of
+      // %Q3 instead.
+      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    }
+    MachineInstr *UseMI = Use->getInstr();
+    // For all uses of the register, calculate the maxmimum latency
+    int Latency = -1;
+    if (UseMI) {
       for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
         const MachineOperand &MO = UseMI->getOperand(i);
         if (!MO.isReg() || !MO.isUse())
@@ -544,15 +611,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
         if (MOReg != Reg)
           continue;
 
-        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
-        if (UseCycle >= 0)
-          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+                                              UseMI, i);
+        Latency = std::max(Latency, UseCycle);
       }
-
-      // If we found a latency, then replace the existing dependence latency.
-      if (Latency >= 0)
-        dep.setLatency(Latency);
+    } else {
+      // UseMI is null, then it must be a scheduling barrier.
+      if (!InstrItins || InstrItins->isEmpty())
+        return;
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
     }
+
+    // If we found a latency, then replace the existing dependence latency.
+    if (Latency >= 0)
+      dep.setLatency(Latency);
   }
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c8f543f7146d..c878287d9c8c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -101,6 +101,7 @@ namespace llvm {
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
     const MachineFrameInfo *MFI;
+    const InstrItineraryData *InstrItins;
 
     /// Defs, Uses - Remember where defs and uses of each physical register
     /// are as we iterate upward through the instructions. This is allocated
@@ -163,6 +164,15 @@ namespace llvm {
     /// input.
     virtual void BuildSchedGraph(AliasAnalysis *AA);
 
+    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+    /// list of instructions being scheduled to scheduling barrier. We want to
+    /// make sure instructions which define registers that are either used by
+    /// the terminator or are live-out are properly scheduled. This is
+    /// especially important when the definition latency of the return value(s)
+    /// are too high to be hidden by the branch or when the liveout registers
+    /// used by instructions in the fallthrough block.
+    void AddSchedBarrierDeps();
+
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index cbde2b01eeaf..e6d7ded8a784 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,56 +7,81 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This implements a hazard recognizer using the instructions itineraries
-// defined for the current target.
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "post-RA-sched"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetInstrItineraries.h"
 
 using namespace llvm;
 
-PostRAHazardRecognizer::
-PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
-  ScheduleHazardRecognizer(), ItinData(LItinData) {
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+                           const ScheduleDAG *SchedDAG,
+                           const char *ParentDebugType) :
+  ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+  IssueCount(0) {
+
+#ifndef NDEBUG
+  DebugType = ParentDebugType;
+#endif
+
   // Determine the maximum depth of any itinerary. This determines the
   // depth of the scoreboard. We always make the scoreboard at least 1
   // cycle deep to avoid dealing with the boundary condition.
   unsigned ScoreboardDepth = 1;
-  if (!ItinData.isEmpty()) {
+  if (ItinData && !ItinData->isEmpty()) {
+    IssueWidth = ItinData->IssueWidth;
+
     for (unsigned idx = 0; ; ++idx) {
-      if (ItinData.isEndMarker(idx))
+      if (ItinData->isEndMarker(idx))
         break;
 
-      const InstrStage *IS = ItinData.beginStage(idx);
-      const InstrStage *E = ItinData.endStage(idx);
+      const InstrStage *IS = ItinData->beginStage(idx);
+      const InstrStage *E = ItinData->endStage(idx);
+      unsigned CurCycle = 0;
       unsigned ItinDepth = 0;
-      for (; IS != E; ++IS)
-        ItinDepth += IS->getCycles();
+      for (; IS != E; ++IS) {
+        unsigned StageDepth = CurCycle + IS->getCycles();
+        if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+        CurCycle += IS->getNextCycles();
+      }
 
-      ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+      // Find the next power-of-2 >= ItinDepth
+      while (ItinDepth > ScoreboardDepth) {
+        ScoreboardDepth *= 2;
+      }
     }
+    MaxLookAhead = ScoreboardDepth;
   }
 
   ReservedScoreboard.reset(ScoreboardDepth);
   RequiredScoreboard.reset(ScoreboardDepth);
 
-  DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " 
+  DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
                << ScoreboardDepth << '\n');
 }
 
-void PostRAHazardRecognizer::Reset() {
+void ScoreboardHazardRecognizer::Reset() {
+  IssueCount = 0;
   RequiredScoreboard.reset();
   ReservedScoreboard.reset();
 }
 
-void PostRAHazardRecognizer::ScoreBoard::dump() const {
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
   dbgs() << "Scoreboard:\n";
 
   unsigned last = Depth - 1;
@@ -72,24 +97,46 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const {
   }
 }
 
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+  if (IssueWidth == 0)
+    return false;
+
+  return IssueCount == IssueWidth;
+}
+
 ScheduleHazardRecognizer::HazardType
-PostRAHazardRecognizer::getHazardType(SUnit *SU) {
-  if (ItinData.isEmpty())
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  if (!ItinData || ItinData->isEmpty())
     return NoHazard;
 
-  unsigned cycle = 0;
+  // Note that stalls will be negative for bottom-up scheduling.
+  int cycle = Stalls;
 
   // Use the itinerary for the underlying instruction to check for
   // free FU's in the scoreboard at the appropriate future cycles.
-  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx),
-         *E = ItinData.endStage(idx); IS != E; ++IS) {
+
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  if (TID == NULL) {
+    // Don't check hazards for non-machineinstr Nodes.
+    return NoHazard;
+  }
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must find one of the stage's units free for every cycle the
     // stage is occupied. FIXME it would be more accurate to find the
     // same unit free in all the cycles.
     for (unsigned int i = 0; i < IS->getCycles(); ++i) {
-      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
-             "Scoreboard depth exceeded!");
+      int StageCycle = cycle + (int)i;
+      if (StageCycle < 0)
+        continue;
+
+      if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+        assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+               "Scoreboard depth exceeded!");
+        // This stage was stalled beyond pipeline depth, so cannot conflict.
+        break;
+      }
 
       unsigned freeUnits = IS->getUnits();
       switch (IS->getReservationKind()) {
@@ -97,18 +144,18 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
        assert(0 && "Invalid FU reservation");
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
-        freeUnits &= ~ReservedScoreboard[cycle + i];
+        freeUnits &= ~ReservedScoreboard[StageCycle];
         // FALLTHROUGH
       case InstrStage::Reserved:
         // Reserved FUs can conflict only with required ones.
-        freeUnits &= ~RequiredScoreboard[cycle + i];
+        freeUnits &= ~RequiredScoreboard[StageCycle];
         break;
       }
 
       if (!freeUnits) {
         DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
         DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
-        DEBUG(SU->getInstr()->dump());
+        DEBUG(DAG->dumpNode(SU));
         return Hazard;
       }
     }
@@ -120,17 +167,24 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
   return NoHazard;
 }
 
-void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
-  if (ItinData.isEmpty())
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (!ItinData || ItinData->isEmpty())
     return;
 
-  unsigned cycle = 0;
-
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
-  unsigned idx = SU->getInstr()->getDesc().getSchedClass();
-  for (const InstrStage *IS = ItinData.beginStage(idx),
-         *E = ItinData.endStage(idx); IS != E; ++IS) {
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  assert(TID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(TID->Opcode))
+    return;
+
+  ++IssueCount;
+
+  unsigned cycle = 0;
+
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
     // stage is occupied. FIXME it would be more accurate to reserve
     // the same unit free in all the cycles.
@@ -174,7 +228,16 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
   DEBUG(RequiredScoreboard.dump());
 }
 
-void PostRAHazardRecognizer::AdvanceCycle() {
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+  IssueCount = 0;
   ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
   RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
 }
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+  ReservedScoreboard.recede();
+  RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+  RequiredScoreboard.recede();
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 799988a4c862..15932c03a190 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -21,5 +21,3 @@ add_llvm_library(LLVMSelectionDAG
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
-
-target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9c4d91e9736..90356021f602 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");
 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
 
 namespace {
   static cl::opt<bool>
@@ -185,7 +185,7 @@ namespace {
     SDValue visitANY_EXTEND(SDNode *N);
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     SDValue visitTRUNCATE(SDNode *N);
-    SDValue visitBIT_CONVERT(SDNode *N);
+    SDValue visitBITCAST(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
     SDValue visitFADD(SDNode *N);
     SDValue visitFSUB(SDNode *N);
@@ -229,12 +229,13 @@ namespace {
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
-    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -248,16 +249,19 @@ namespace {
     bool isAlias(SDValue Ptr1, int64_t Size1,
                  const Value *SrcValue1, int SrcValueOffset1,
                  unsigned SrcValueAlign1,
+                 const MDNode *TBAAInfo1,
                  SDValue Ptr2, int64_t Size2,
                  const Value *SrcValue2, int SrcValueOffset2,
-                 unsigned SrcValueAlign2) const;
+                 unsigned SrcValueAlign2,
+                 const MDNode *TBAAInfo2) const;
 
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
                        SDValue &Ptr, int64_t &Size,
                        const Value *&SrcValue, int &SrcValueOffset,
-                       unsigned &SrcValueAlignment) const;
+                       unsigned &SrcValueAlignment,
+                       const MDNode *&TBAAInfo) const;
 
     /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
     /// looking for a better chain (aliasing node.)
@@ -270,15 +274,15 @@ namespace {
 
     /// Run - runs the dag combiner on all nodes in the work list
     void Run(CombineLevel AtLevel);
-    
+
     SelectionDAG &getDAG() const { return DAG; }
-    
+
     /// getShiftAmountTy - Returns a type large enough to hold any valid
     /// shift amount - before type legalization these can be huge.
     EVT getShiftAmountTy() {
       return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
     }
-    
+
     /// isTypeLegal - This method returns true if we are running before type
     /// legalization or if the specified VT is legal.
     bool isTypeLegal(const EVT &VT) {
@@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DEBUG(dbgs() << "\nReplacing.2 "; 
+  DEBUG(dbgs() << "\nReplacing.2 ";
         TLO.Old.getNode()->dump(&DAG);
         dbgs() << "\nWith: ";
         TLO.New.getNode()->dump(&DAG);
@@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+                                                  : ISD::EXTLOAD)
       : LD->getExtensionType();
     Replace = true;
-    return DAG.getExtLoad(ExtType, PVT, dl,
+    return DAG.getExtLoad(ExtType, dl, PVT,
                           LD->getChain(), LD->getBasePtr(),
-                          LD->getSrcValue(), LD->getSrcValueOffset(),
+                          LD->getPointerInfo(),
                           MemVT, LD->isVolatile(),
                           LD->isNonTemporal(), LD->getAlignment());
   }
@@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
     unsigned ExtOpc =
       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     return DAG.getNode(ExtOpc, dl, PVT, Op);
-  }    
+  }
   }
 
   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT MemVT = LD->getMemoryVT();
     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
-      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 
+                                                  : ISD::EXTLOAD)
       : LD->getExtensionType();
-    SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
                                    LD->getChain(), LD->getBasePtr(),
-                                   LD->getSrcValue(), LD->getSrcValueOffset(),
+                                   LD->getPointerInfo(),
                                    MemVT, LD->isVolatile(),
                                    LD->isNonTemporal(), LD->getAlignment());
     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DEBUG(dbgs() << "\nReplacing.3 "; 
+    DEBUG(dbgs() << "\nReplacing.3 ";
           N->dump(&DAG);
           dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
@@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
-  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
+  case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   case ISD::FADD:               return visitFADD(N);
   case ISD::FSUB:               return visitFSUB(N);
@@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
       }
     }
   }
-  
+
   SDValue Result;
 
   // If we've change things around then replace token factor.
@@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                        N0.getOperand(0).getOperand(1),
                                        N0.getOperand(1)));
 
+  if (N1.getOpcode() == ISD::AND) {
+    SDValue AndOp0 = N1.getOperand(0);
+    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+    // and similar xforms where the inner op is either ~0 or 0.
+    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+      DebugLoc DL = N->getDebugLoc();
+      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+    }
+  }
+
+  // add (sext i1), X -> sub X, (zext i1)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+      N0.getOperand(0).getValueType() == MVT::i1 &&
+      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+    DebugLoc DL = N->getDebugLoc();
+    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+  }
+
   return SDValue();
 }
 
@@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   if (N->hasNUsesOfValue(0, 1))
     return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
                      DAG.getNode(ISD::CARRY_FALSE,
-                                 N->getDebugLoc(), MVT::Flag));
+                                 N->getDebugLoc(), MVT::Glue));
 
   // canonicalize constant to RHS.
   if (N0C && !N1C)
@@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   // fold (addc x, 0) -> x + no carry out
   if (N1C && N1C->isNullValue())
     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
-                                        N->getDebugLoc(), MVT::Flag));
+                                        N->getDebugLoc(), MVT::Glue));
 
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
@@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
         (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
       return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
                        DAG.getNode(ISD::CARRY_FALSE,
-                                   N->getDebugLoc(), MVT::Flag));
+                                   N->getDebugLoc(), MVT::Glue));
   }
 
   return SDValue();
@@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
   return SDValue();
 }
 
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+                             SelectionDAG &DAG, bool LegalOperations) {                            
+  if (!VT.isVector()) {
+    return DAG.getConstant(0, VT);
+  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+    // Produce a vector of zeros.
+    SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+    std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+      &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   }
 
   // fold (sub x, x) -> 0
+  // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return DAG.getConstant(0, N->getValueType(0));
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
   // fold (sub c1, c2) -> c1-c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   if (N0C && N0C->isAllOnesValue())
     return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold A-(A-B) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+    return N1.getOperand(1);
   // fold (A+B)-A -> B
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
     return N0.getOperand(1);
@@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
 
   // fold (mulhs x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
     return DAG.getConstant(0, VT);
 
+  // If the type twice as wide is legal, transform the mulhs to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
 
   // fold (mulhu x, 0) -> 0
   if (N1C && N1C->isNullValue())
@@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
     return DAG.getConstant(0, VT);
 
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
   if (Res.getNode()) return Res;
 
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
   if (Res.getNode()) return Res;
 
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+  
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+                       DAG.getConstant(SimpleSize, getShiftAmountTy()));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+  
   return SDValue();
 }
 
@@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N0Op0 = N0.getOperand(0);
     APInt Mask = ~N1C->getAPIntValue();
-    Mask.trunc(N0Op0.getValueSizeInBits());
+    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
                                  N0.getValueType(), N0Op0);
@@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
-                                       LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getPointerInfo(), MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       AddToWorkList(N);
@@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       AddToWorkList(N);
@@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         if (ExtVT == LoadedVT &&
             (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-          
-          SDValue NewLoad = 
-            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+          SDValue NewLoad =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(),
-                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           LN0->getPointerInfo(),
                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
                            LN0->getAlignment());
           AddToWorkList(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
-        
+
         // Do not change the width of a volatile load.
         // Do not generate loads of non-round integer types since these can
         // be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
           }
 
           AddToWorkList(NewPtr.getNode());
-          
+
           EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
           SDValue Load =
-            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), NewPtr,
-                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           LN0->getPointerInfo(),
                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
                            Alignment);
           AddToWorkList(N);
@@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
                                          N01C->getAPIntValue(), VT));
   }
   // fold (xor x, x) -> 0
-  if (N0 == N1) {
-    if (!VT.isVector()) {
-      return DAG.getConstant(0, VT);
-    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
-      // Produce a vector of zeros.
-      SDValue El = DAG.getConstant(0, VT.getVectorElementType());
-      std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
-      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
-                         &Ops[0], Ops.size());
-    }
-  }
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
 
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
                                LHS->getOperand(1), N->getOperand(1));
 
   // Create the new shift.
-  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+  SDValue NewShift = DAG.getNode(N->getOpcode(),
+                                 LHS->getOperand(0).getDebugLoc(),
                                  VT, LHS->getOperand(0), N->getOperand(1));
 
   // Create the new binop.
@@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
       return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
                                      DAG.getNode(ISD::TRUNCATE,
@@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 > OpSizeInBits)
+    if (c1 + c2 >= OpSizeInBits)
       return DAG.getConstant(0, VT);
     return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
+
+  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+  // For this to be valid, the second form must not preserve any of the bits
+  // that are shifted out by the inner shift in the first form.  This means
+  // the outer shift size must be >= the number of bits added by the ext.
+  // As a corollary, we don't care what kind of ext it is.
+  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+              N0.getOpcode() == ISD::ANY_EXTEND ||
+              N0.getOpcode() == ISD::SIGN_EXTEND) &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 = 
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+                                     N0.getOperand(0)->getOperand(0)),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
+  }
+
   // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
   //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL &&
@@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (N01C && N1C) {
       // Determine what the truncate's result bitsize and type would be.
       EVT TruncVT =
-        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+        EVT::getIntegerVT(*DAG.getContext(),
+                          OpSizeInBits - N1C->getZExtValue());
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
       return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(),
                                      TruncVT,
@@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     }
   }
 
+  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  //      if c1 is equal to the number of bits the trunc removes
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (N0.getOperand(0).getOpcode() == ISD::SRL ||
+       N0.getOperand(0).getOpcode() == ISD::SRA) &&
+      N0.getOperand(0).hasOneUse() &&
+      N0.getOperand(0).getOperand(1).hasOneUse() &&
+      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    EVT LargeVT = N0.getOperand(0).getValueType();
+    ConstantSDNode *LargeShiftAmt =
+      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+        LargeShiftAmt->getZExtValue()) {
+      SDValue Amt =
+        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+                        getShiftAmountTy());
+      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+                                N0.getOperand(0).getOperand(0), Amt);
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+    }
+  }
+
   // Simplify, based on bits shifted out of the LHS.
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
@@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 > OpSizeInBits)
+    if (c1 + c2 >= OpSizeInBits)
       return DAG.getConstant(0, VT);
     return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(c1 + c2, N1.getValueType()));
   }
-  
+
+  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(0).getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 = 
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+    if (c1 + OpSizeInBits == InnerShiftSize) {
+      if (c1 + c2 >= InnerShiftSize)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 
+                                     N0.getOperand(0)->getOperand(0),
+                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+    }
+  }
+
   // fold (srl (shl x, c), c) -> (and x, cst2)
   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
       N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getConstant(~0ULL >> ShAmt, VT));
   }
-  
+
 
   // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
       return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(),
                                      TruncVT,
@@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   //   brcond i32 %c ...
   //
   // into
-  // 
+  //
   //   %a = ...
   //   %b = and %a, 2
   //   %c = setcc eq %b, 0
@@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
     }
     if (BothLiveOut)
       // Both unextended and extended values are live out. There had better be
-      // good a reason for the transformation.
+      // a good reason for the transformation.
       return ExtendNodes.size();
   }
   return true;
@@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       CombineTo(N, ExtLoad);
@@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                       N0.getOperand(0), N0.getOperand(1),
                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                          NegOne, DAG.getConstant(0, VT));
-  }  
+  }
 
   // fold (sext x) -> (zext x) if the sign bit is known zero.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE &&
       (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+    // fold (zext (truncate (load x))) -> (zext (smaller load x))
+    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
     SDValue Op = N0.getOperand(0);
     if (Op.getValueType().bitsLT(VT)) {
       Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    Mask.zext(VT.getSizeInBits());
+    Mask = Mask.zext(VT.getSizeInBits());
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
                        X, DAG.getConstant(Mask, VT));
   }
@@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     if ((!LegalOperations && !LN0->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
-      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
       CombineTo(N, ExtLoad);
@@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       isa<ConstantSDNode>(N0.getOperand(1)) &&
       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
       N0.hasOneUse()) {
+    SDValue ShAmt = N0.getOperand(1);
+    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
     if (N0.getOpcode() == ISD::SHL) {
+      SDValue InnerZExt = N0.getOperand(0);
       // If the original shl may be shifting out bits, do not perform this
       // transformation.
-      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
-        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
-      if (ShAmt > KnownZeroBits)
+      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      if (ShAmtVal > KnownZeroBits)
         return SDValue();
     }
-    DebugLoc dl = N->getDebugLoc();
-    return DAG.getNode(N0.getOpcode(), dl, VT,
-                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
-                       DAG.getNode(ISD::ZERO_EXTEND, dl,
-                                   N0.getOperand(1).getValueType(),
-                                   N0.getOperand(1)));
+
+    DebugLoc DL = N->getDebugLoc();
+    
+    // Ensure that the shift amount is wide enough for the shifted value. 
+    if (VT.getSizeInBits() >= 256)
+      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+    
+    return DAG.getNode(N0.getOpcode(), DL, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+                       ShAmt);
   }
 
   return SDValue();
@@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    Mask.zext(VT.getSizeInBits());
+    Mask = Mask.zext(VT.getSizeInBits());
     return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
                        X, DAG.getConstant(Mask, VT));
   }
@@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
     if (DoXform) {
       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
                                        LN0->getChain(),
-                                       LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
                                        N0.getValueType(),
                                        LN0->isVolatile(), LN0->isNonTemporal(),
                                        LN0->getAlignment());
@@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       N0.hasOneUse()) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     EVT MemVT = LN0->getMemoryVT();
-    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
-                                     N->getDebugLoc(),
-                                     LN0->getChain(), LN0->getBasePtr(),
-                                     LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), MemVT,
+    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+                                     VT, LN0->getChain(), LN0->getBasePtr(),
+                                     LN0->getPointerInfo(), MemVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
-      return SDValue();
   } else if (Opc == ISD::SRL) {
-    // Annother special-case: SRL is basically zero-extending a narrower
-    // value.
+    // Another special-case: SRL is basically zero-extending a narrower value.
     ExtType = ISD::ZEXTLOAD;
     N0 = SDValue(N, 0);
     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
                               VT.getSizeInBits() - N01->getZExtValue());
   }
+  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+    return SDValue();
 
   unsigned EVTBits = ExtVT.getSizeInBits();
+  
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!ExtVT.isRound())
+    return SDValue();
+  
   unsigned ShAmt = 0;
-  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       ShAmt = N01->getZExtValue();
       // Is the shift amount a multiple of size of VT?
@@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
+
+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+      
+      // If the shift amount is larger than the input type then we're not
+      // accessing any of the loaded bytes.  If the load was a zextload/extload
+      // then the result of the shift+trunc is zero/undef (handled elsewhere).
+      // If the load was a sextload then the result is a splat of the sign bit
+      // of the extended byte.  This is not worth optimizing for.
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+        return SDValue();
     }
   }
 
-  // Do not generate loads of non-round integer types since these can
-  // be expensive (and would be wrong if the type is not byte sized).
-  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
-      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile()) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    EVT PtrType = N0.getOperand(1).getValueType();
-
-    // For big endian targets, we need to adjust the offset to the pointer to
-    // load the correct bytes.
-    if (TLI.isBigEndian()) {
-      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
-      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
-      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
-    }
-
-    uint64_t PtrOff =  ShAmt / 8;
-    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
-    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
-                                 PtrType, LN0->getBasePtr(),
-                                 DAG.getConstant(PtrOff, PtrType));
-    AddToWorkList(NewPtr.getNode());
-
-    SDValue Load = (ExtType == ISD::NON_EXTLOAD)
-      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
-                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
-      : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                       NewAlign);
-
-    // Replace the old load's chain with the new load's chain.
-    WorkListRemover DeadNodes(*this);
-    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
-                                  &DeadNodes);
+  // If the load is shifted left (and the result isn't shifted back right),
+  // we can fold the truncate through the shift.
+  unsigned ShLeftAmt = 0;
+  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShLeftAmt = N01->getZExtValue();
+      N0 = N0.getOperand(0);
+    }
+  }
+  
+  // If we haven't found a load, we can't narrow it.  Don't transform one with
+  // multiple uses, this would require adding a new load.
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+      // Don't change the width of a volatile load.
+      cast<LoadSDNode>(N0)->isVolatile())
+    return SDValue();
+  
+  // Verify that we are actually reducing a load width here.
+  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+  
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  EVT PtrType = N0.getOperand(1).getValueType();
+
+  // For big endian targets, we need to adjust the offset to the pointer to
+  // load the correct bytes.
+  if (TLI.isBigEndian()) {
+    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+  }
+
+  uint64_t PtrOff = ShAmt / 8;
+  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+                               PtrType, LN0->getBasePtr(),
+                               DAG.getConstant(PtrOff, PtrType));
+  AddToWorkList(NewPtr.getNode());
+
+  SDValue Load;
+  if (ExtType == ISD::NON_EXTLOAD)
+    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+                        LN0->getPointerInfo().getWithOffset(PtrOff),
+                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+  else
+    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+                          LN0->getPointerInfo().getWithOffset(PtrOff),
+                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                          NewAlign);
+
+  // Replace the old load's chain with the new load's chain.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+                                &DeadNodes);
 
-    // Return the new loaded value.
-    return Load;
+  // Shift the result left, if we've swallowed a left shift.
+  SDValue Result = Load;
+  if (ShLeftAmt != 0) {
+    EVT ShImmTy = getShiftAmountTy();
+    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+      ShImmTy = VT;
+    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));
   }
 
-  return SDValue();
+  // Return the new loaded value.
+  return Result;
 }
 
 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
     CombineTo(N, ExtLoad);
@@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
 
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
-  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+      LD1->getPointerInfo().getAddrSpace() !=
+         LD2->getPointerInfo().getAddrSpace())
     return SDValue();
   EVT LD1VT = LD1->getValueType(0);
 
@@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
-                         LD1->getBasePtr(), LD1->getSrcValue(),
-                         LD1->getSrcValueOffset(), false, false, Align);
+                         LD1->getBasePtr(), LD1->getPointerInfo(),
+                         false, false, Align);
   }
 
   return SDValue();
 }
 
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
@@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
     assert(!DestEltVT.isVector() &&
            "Element type of vector ValueType must not be vector!");
     if (isSimple)
-      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   }
 
   // If the input is a constant, let getNode fold it.
   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
-    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
     if (Res.getNode() != N) {
       if (!LegalOperations ||
           TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   }
 
   // (conv (conv x, t1), t2) -> (conv x, t2)
-  if (N0.getOpcode() == ISD::BIT_CONVERT)
-    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+  if (N0.getOpcode() == ISD::BITCAST)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
                        N0.getOperand(0));
 
   // fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
 
     if (Align <= OrigAlign) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
-                                 LN0->getBasePtr(),
-                                 LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                                 LN0->getBasePtr(), LN0->getPointerInfo(),
                                  LN0->isVolatile(), LN0->isNonTemporal(),
                                  OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
-                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                             N0.getValueType(), Load),
                 Load.getValue(1));
       return Load;
@@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
   // This often reduces constant pool loads.
   if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
       N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
-    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
     AddToWorkList(NewConv.getNode());
 
@@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (isTypeLegal(IntXVT)) {
-      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                               IntXVT, N0.getOperand(1));
       AddToWorkList(X.getNode());
 
@@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
                       X, DAG.getConstant(SignBit, VT));
       AddToWorkList(X.getNode());
 
-      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
                                 VT, N0.getOperand(0));
       Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
                         Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   return CombineConsecutiveLoads(N, VT);
 }
 
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
 /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
 /// destination element value type.
 SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
 
   // If this is already the right type, we're done.
@@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     // Due to the FP element handling below calling this routine recursively,
     // we can end up with a scalar-to-vector node here.
     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 
-                         DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
                                      DstEltVT, BV->getOperand(0)));
-      
+
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
       SDValue Op = BV->getOperand(i);
@@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       // are promoted and implicitly truncated.  Make that explicit here.
       if (Op.getValueType() != SrcEltVT)
         Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
-      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
                                 DstEltVT, Op));
       AddToWorkList(Ops.back().getNode());
     }
@@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     // same sizes.
     assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
-    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
     SrcEltVT = IntVT;
   }
 
@@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   if (DstEltVT.isFloatingPoint()) {
     assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
-    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
 
     // Next, convert to FP elements of the same size.
-    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   }
 
   // Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
         if (Op.getOpcode() == ISD::UNDEF) continue;
         EltIsUndef = false;
 
-        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
       }
 
@@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
       continue;
     }
 
-    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
-                        getAPIntValue()).zextOrTrunc(SrcBitSize);
+    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+                  getAPIntValue().zextOrTrunc(SrcBitSize);
 
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
-      APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+      APInt ThisVal = OpVal.trunc(DstBitSize);
       Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
         // Simply turn this into a SCALAR_TO_VECTOR of the new type.
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
                            Ops[0]);
@@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
        TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
                                      LN0->getChain(),
-                                     LN0->getBasePtr(), LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
                                      N0.getValueType(),
                                      LN0->isVolatile(), LN0->isNonTemporal(),
                                      LN0->getAlignment());
@@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BIT_CONVERT && 
+  if (N0.getOpcode() == ISD::BITCAST &&
       !VT.isVector() &&
       N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
               DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
       AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                          VT, Int);
     }
   }
@@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
@@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
              DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
       AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                          N->getValueType(0), Int);
     }
   }
@@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
-  SDNode *Trunc = 0;
-  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
-    // Look past truncate.
-    Trunc = N1.getNode();
-    N1 = N1.getOperand(0);
-  }
+  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+       (N1.getOperand(0).hasOneUse() &&
+        N1.getOperand(0).getOpcode() == ISD::SRL))) {
+    SDNode *Trunc = 0;
+    if (N1.getOpcode() == ISD::TRUNCATE) {
+      // Look pass the truncate.
+      Trunc = N1.getNode();
+      N1 = N1.getOperand(0);
+    }
 
-  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
     // Match this pattern so that we can generate simpler code:
     //
     //   %a = ...
@@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     //   brcond i32 %c ...
     //
     // into
-    // 
+    //
     //   %a = ...
     //   %b = and i32 %a, 2
     //   %c = setcc eq %b, 0
@@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
         }
       }
     }
+
+    if (Trunc)
+      // Restore N1 if the above transformation doesn't match.
+      N1 = N->getOperand(1);
   }
-  
+
   // Transform br(xor(x, y)) -> br(x != y)
   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
           Equal = true;
         }
 
-      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-      
-      EVT SetCCVT = NodeToReplace.getValueType();
+      EVT SetCCVT = N1.getValueType();
       if (LegalTypes)
         SetCCVT = TLI.getSetCCResultType(SetCCVT);
       SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                                    Equal ? ISD::SETEQ : ISD::SETNE);
       // Replace the uses of XOR with SETCC
       WorkListRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
-      removeFromWorkList(NodeToReplace.getNode());
-      DAG.DeleteNode(NodeToReplace.getNode());
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
       return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
                          MVT::Other, Chain, SetCC, N2);
     }
@@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
-                              N->getDebugLoc(),
-                              Chain, Ptr, LD->getSrcValue(),
-                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getPointerInfo(),
+                              LD->getMemoryVT(),
                               LD->isVolatile(), LD->isNonTemporal(), Align);
     }
   }
@@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Replace the chain to void dependency.
       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
-                               BetterChain, Ptr,
-                               LD->getSrcValue(), LD->getSrcValueOffset(),
+                               BetterChain, Ptr, LD->getPointerInfo(),
                                LD->isVolatile(), LD->isNonTemporal(),
                                LD->getAlignment());
       } else {
-        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
-                                  LD->getDebugLoc(),
-                                  BetterChain, Ptr, LD->getSrcValue(),
-                                  LD->getSrcValueOffset(),
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getPointerInfo(),
                                   LD->getMemoryVT(),
                                   LD->isVolatile(),
                                   LD->isNonTemporal(),
@@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       // Create token factor to keep old chain connected.
       SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                   MVT::Other, Chain, ReplLoad.getValue(1));
-      
+
       // Make sure the new and old chains are cleaned up.
       AddToWorkList(Token.getNode());
-      
+
       // Replace uses with load result and token factor. Don't add users
       // to work list.
       return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 static std::pair<unsigned, unsigned>
 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   std::pair<unsigned, unsigned> Result(0, 0);
-  
+
   // Check for the structure we're looking for.
   if (V->getOpcode() != ISD::AND ||
       !isa<ConstantSDNode>(V->getOperand(1)) ||
       !ISD::isNormalLoad(V->getOperand(0).getNode()))
     return Result;
-  
+
   // Check the chain and pointer.
   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
-  
+
   // The store should be chained directly to the load or be an operand of a
   // tokenfactor.
   if (LD == Chain.getNode())
@@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
       }
     if (!isOk) return Result;
   }
-  
+
   // This only handles simple types.
   if (V.getValueType() != MVT::i16 &&
       V.getValueType() != MVT::i32 &&
@@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   if (NotMaskLZ == 64) return Result;  // All zero mask.
-  
+
   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
     return Result;
@@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   if (V.getValueType() != MVT::i64 && NotMaskLZ)
     NotMaskLZ -= 64-V.getValueSizeInBits();
-  
+
   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   switch (MaskedBytes) {
-  case 1: 
-  case 2: 
+  case 1:
+  case 2:
   case 4: break;
   default: return Result; // All one mask, or 5-byte mask.
   }
-  
+
   // Verify that the first bit starts at a multiple of mask so that the access
   // is aligned the same as the access width.
   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-  
+
   Result.first = MaskedBytes;
   Result.second = NotMaskTZ/8;
   return Result;
@@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   unsigned NumBytes = MaskInfo.first;
   unsigned ByteShift = MaskInfo.second;
   SelectionDAG &DAG = DC->getDAG();
-  
+
   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   // that uses this.  If not, this is not a replacement.
   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
                                   ByteShift*8, (ByteShift+NumBytes)*8);
   if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-  
+
   // Check that it is legal on the target to do this.  It is legal if the new
   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   // legalization.
   MVT VT = MVT::getIntegerVT(NumBytes*8);
   if (!DC->isTypeLegal(VT))
     return 0;
-  
+
   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   // shifted by ByteShift and truncated down to NumBytes.
   if (ByteShift)
@@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
     StOffset = ByteShift;
   else
     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-  
+
   SDValue Ptr = St->getBasePtr();
   if (StOffset) {
     Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
                       Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
     NewAlign = MinAlign(NewAlign, StOffset);
   }
-  
+
   // Truncate down to the new size.
   IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-  
+
   ++OpsNarrowed;
-  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 
-                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+                      St->getPointerInfo().getWithOffset(StOffset),
                       false, false, NewAlign).getNode();
 }
 
@@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
     return SDValue();
 
   unsigned Opc = Value.getOpcode();
-  
+
   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   // is a byte mask indicating a consecutive number of bytes, check to see if
   // Y is known to provide just those bytes.  If so, we try to replace the
@@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
                                                   Value.getOperand(1), ST,this))
         return SDValue(NewST, 0);
-                                           
+
     // Or is commutative, so try swapping X and Y.
     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
     if (MaskedLoad.first)
@@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                                   Value.getOperand(0), ST,this))
         return SDValue(NewST, 0);
   }
-  
+
   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
       Value.getOperand(1).getOpcode() != ISD::Constant)
     return SDValue();
@@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
       Chain == SDValue(N0.getNode(), 1)) {
     LoadSDNode *LD = cast<LoadSDNode>(N0);
-    if (LD->getBasePtr() != Ptr)
+    if (LD->getBasePtr() != Ptr ||
+        LD->getPointerInfo().getAddrSpace() !=
+        ST->getPointerInfo().getAddrSpace())
       return SDValue();
 
     // Find the type to narrow it the load / op / store to.
@@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                    DAG.getConstant(PtrOff, Ptr.getValueType()));
       SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                   LD->getChain(), NewPtr,
-                                  LD->getSrcValue(), LD->getSrcValueOffset(),
+                                  LD->getPointerInfo().getWithOffset(PtrOff),
                                   LD->isVolatile(), LD->isNonTemporal(),
                                   NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                    NewVal, NewPtr,
-                                   ST->getSrcValue(), ST->getSrcValueOffset(),
+                                   ST->getPointerInfo().getWithOffset(PtrOff),
                                    false, false, NewAlign);
 
       AddToWorkList(NewPtr.getNode());
@@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   return SDValue();
 }
 
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // If this is a store of a bit convert, store the input value if the
   // resultant store does not need a higher alignment than the original.
-  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
@@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         ((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
       return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
-                          Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          Ptr, ST->getPointerInfo(), ST->isVolatile(),
                           ST->isNonTemporal(), OrigAlign);
   }
 
@@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
                               bitcastToAPInt().getZExtValue(), MVT::i32);
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
-                              Ptr, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
         }
         break;
@@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                 getZExtValue(), MVT::i64);
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
-                              Ptr, ST->getSrcValue(),
-                              ST->getSrcValueOffset(), ST->isVolatile(),
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
                               ST->isNonTemporal(), ST->getAlignment());
         } else if (!ST->isVolatile() &&
                    TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
@@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
           if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
-          int SVOffset = ST->getSrcValueOffset();
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
           bool isNonTemporal = ST->isNonTemporal();
 
           SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
-                                     Ptr, ST->getSrcValue(),
-                                     ST->getSrcValueOffset(),
+                                     Ptr, ST->getPointerInfo(),
                                      isVolatile, isNonTemporal,
                                      ST->getAlignment());
           Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                             DAG.getConstant(4, Ptr.getValueType()));
-          SVOffset += 4;
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
-                                     Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, isNonTemporal,
+                                     Ptr, ST->getPointerInfo().getWithOffset(4),
+                                     isVolatile, isNonTemporal,
                                      Alignment);
           return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                              St0, St1);
@@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > ST->getAlignment())
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
-                                 Ptr, ST->getSrcValue(),
-                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                  ST->isVolatile(), ST->isNonTemporal(), Align);
     }
   }
 
+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       // Replace the chain to avoid dependency.
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
-                                      ST->getSrcValue(),ST->getSrcValueOffset(),
+                                      ST->getPointerInfo(),
                                       ST->getMemoryVT(), ST->isVolatile(),
                                       ST->isNonTemporal(), ST->getAlignment());
       } else {
         ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
-                                 ST->getSrcValue(), ST->getSrcValueOffset(),
+                                 ST->getPointerInfo(),
                                  ST->isVolatile(), ST->isNonTemporal(),
                                  ST->getAlignment());
       }
@@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     AddToWorkList(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
-                               Ptr, ST->getSrcValue(),
-                               ST->getSrcValueOffset(), ST->getMemoryVT(),
+                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                ST->isVolatile(), ST->isNonTemporal(),
                                ST->getAlignment());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
     if (SimplifyDemandedBits(Value,
-                             APInt::getLowBitsSet(
-                               Value.getValueType().getScalarType().getSizeInBits(),
-                               ST->getMemoryVT().getScalarType().getSizeInBits())))
+                        APInt::getLowBitsSet(
+                          Value.getValueType().getScalarType().getSizeInBits(),
+                          ST->getMemoryVT().getScalarType().getSizeInBits())))
       return SDValue(N, 0);
   }
 
@@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
                             ST->getMemoryVT())) {
     return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
-                             Ptr, ST->getSrcValue(),
-                             ST->getSrcValueOffset(), ST->getMemoryVT(),
+                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                              ST->isVolatile(), ST->isNonTemporal(),
                              ST->getAlignment());
   }
@@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   if (InVal.getOpcode() == ISD::UNDEF)
     return InVec;
 
+  EVT VT = InVec.getValueType();
+
+  // If we can't generate a legal BUILD_VECTOR, exit 
+  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+    return SDValue();
+
   // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
   // vector with the inserted element.
   if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     if (Elt < Ops.size())
       Ops[Elt] = InVal;
     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                       InVec.getValueType(), &Ops[0], Ops.size());
+                       VT, &Ops[0], Ops.size());
   }
-  // If the invec is an UNDEF and if EltNo is a constant, create a new 
+  // If the invec is an UNDEF and if EltNo is a constant, create a new
   // BUILD_VECTOR with undef elements and the inserted element.
-  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
+  if (InVec.getOpcode() == ISD::UNDEF &&
       isa<ConstantSDNode>(EltNo)) {
-    EVT VT = InVec.getValueType();
     EVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
@@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     if (Elt < Ops.size())
       Ops[Elt] = InVal;
     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
-                       InVec.getValueType(), &Ops[0], Ops.size());
+                       VT, &Ops[0], Ops.size());
   }
   return SDValue();
 }
@@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue EltNo = N->getOperand(1);
 
   if (isa<ConstantSDNode>(EltNo)) {
-    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
     EVT VT = InVec.getValueType();
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
-    if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+    if (InVec.getOpcode() == ISD::BITCAST) {
       EVT BCVT = InVec.getOperand(0).getValueType();
       if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
@@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
       // Select the input vector, guarding against out of range extract vector.
       unsigned NumElems = VT.getVectorNumElements();
-      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
 
-      if (InVec.getOpcode() == ISD::BIT_CONVERT)
+      if (InVec.getOpcode() == ISD::BITCAST)
         InVec = InVec.getOperand(0);
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
@@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
       return SDValue();
 
+    // If Idx was -1 above, Elt is going to be -1, so just return undef.
+    if (Elt == -1)
+      return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
     unsigned Align = LN0->getAlignment();
     if (NewLoad) {
       // Check the resultant load doesn't need a higher alignment than the
       // original load.
       unsigned NewAlign =
-        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+        TLI.getTargetData()
+            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
 
       if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
         return SDValue();
@@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
 
     SDValue NewPtr = LN0->getBasePtr();
+    unsigned PtrOff = 0;
+
     if (Elt) {
-      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+      PtrOff = LVT.getSizeInBits() * Elt / 8;
       EVT PtrType = NewPtr.getValueType();
       if (TLI.isBigEndian())
         PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     }
 
     return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                       LN0->getPointerInfo().getWithOffset(PtrOff),
                        LN0->isVolatile(), LN0->isNonTemporal(), Align);
   }
 
@@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
         unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
         if (ExtIndex > VT.getVectorNumElements())
           return SDValue();
-        
+
         Mask.push_back(ExtIndex);
         continue;
       }
@@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // FIXME: implement canonicalizations from DAG.getVectorShuffle()
 
-  // If it is a splat, check if the argument vector is a build_vector with
-  // all scalar elements the same.
-  if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+  // If it is a splat, check if the argument vector is another splat or a
+  // build_vector with all scalar elements the same.
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
     // If this is a bit convert that changes the element type of the vector but
     // not the number of vector elements, look through it.  Be careful not to
     // look though conversions that change things like v4f32 to v2f64.
-    if (V->getOpcode() == ISD::BIT_CONVERT) {
+    if (V->getOpcode() == ISD::BITCAST) {
       SDValue ConvInput = V->getOperand(0);
       if (ConvInput.getValueType().isVector() &&
           ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     }
 
     if (V->getOpcode() == ISD::BUILD_VECTOR) {
-      unsigned NumElems = V->getNumOperands();
-      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
-      if (NumElems > BaseIdx) {
-        SDValue Base;
-        bool AllSame = true;
-        for (unsigned i = 0; i != NumElems; ++i) {
-          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
-            Base = V->getOperand(i);
-            break;
-          }
+      assert(V->getNumOperands() == NumElts &&
+             "BUILD_VECTOR has wrong number of operands");
+      SDValue Base;
+      bool AllSame = true;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+          Base = V->getOperand(i);
+          break;
         }
-        // Splat of <u, u, u, u>, return <u, u, u, u>
-        if (!Base.getNode())
-          return N0;
-        for (unsigned i = 0; i != NumElems; ++i) {
-          if (V->getOperand(i) != Base) {
-            AllSame = false;
-            break;
-          }
+      }
+      // Splat of <u, u, u, u>, return <u, u, u, u>
+      if (!Base.getNode())
+        return N0;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i) != Base) {
+          AllSame = false;
+          break;
         }
-        // Splat of <x, x, x, x>, return <x, x, x, x>
-        if (AllSame)
-          return N0;
       }
+      // Splat of <x, x, x, x>, return <x, x, x, x>
+      if (AllSame)
+        return N0;
     }
   }
   return SDValue();
@@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   if (N->getOpcode() == ISD::AND) {
-    if (RHS.getOpcode() == ISD::BIT_CONVERT)
+    if (RHS.getOpcode() == ISD::BITCAST)
       RHS = RHS.getOperand(0);
     if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
       SmallVector<int, 8> Indices;
@@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
                                      DAG.getConstant(0, EltVT));
       SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                  RVT, &ZeroOps[0], ZeroOps.size());
-      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
       SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
     }
   }
 
@@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   // things. Simplifying them may result in a loss of legality.
   if (LegalOperations) return SDValue();
 
-  EVT VT = N->getValueType(0);
-  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVBinOp only works on vectors!");
 
-  EVT EltType = VT.getVectorElementType();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
           break;
       }
 
-      // If the vector element type is not legal, the BUILD_VECTOR operands
-      // are promoted and implicitly truncated.  Make that explicit here.
-      if (LHSOp.getValueType() != EltType)
-        LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
-      if (RHSOp.getValueType() != EltType)
-        RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
-      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+      EVT VT = LHSOp.getValueType();
+      assert(RHSOp.getValueType() == VT &&
+             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
                                    LHSOp, RHSOp);
       if (FoldOp.getOpcode() != ISD::UNDEF &&
           FoldOp.getOpcode() != ISD::Constant &&
@@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       AddToWorkList(FoldOp.getNode());
     }
 
-    if (Ops.size() == LHS.getNumOperands()) {
-      EVT VT = LHS.getValueType();
-      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
-                         &Ops[0], Ops.size());
-    }
+    if (Ops.size() == LHS.getNumOperands())
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                         LHS.getValueType(), &Ops[0], Ops.size());
   }
 
   return SDValue();
@@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                     SDValue RHS) {
 
+  // Cannot simplify select with vector condition
+  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
   // If this is a select from two identical things, try to pull the operation
   // through the select.
-  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
-    // If this is a load and the token chain is identical, replace the select
-    // of two loads with a load through a select of the address to load from.
-    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
-    // constants have been dropped into the constant pool.
-    if (LHS.getOpcode() == ISD::LOAD &&
+  if (LHS.getOpcode() != RHS.getOpcode() ||
+      !LHS.hasOneUse() || !RHS.hasOneUse())
+    return false;
+
+  // If this is a load and the token chain is identical, replace the select
+  // of two loads with a load through a select of the address to load from.
+  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+  // constants have been dropped into the constant pool.
+  if (LHS.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+    // Token chains must be identical.
+    if (LHS.getOperand(0) != RHS.getOperand(0) ||
         // Do not let this transformation reduce the number of volatile loads.
-        !cast<LoadSDNode>(LHS)->isVolatile() &&
-        !cast<LoadSDNode>(RHS)->isVolatile() &&
-        // Token chains must be identical.
-        LHS.getOperand(0) == RHS.getOperand(0)) {
-      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
-      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
-
-      // If this is an EXTLOAD, the VT's must match.
-      if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+        LLD->isVolatile() || RLD->isVolatile() ||
+        // If this is an EXTLOAD, the VT's must match.
+        LLD->getMemoryVT() != RLD->getMemoryVT() ||
+        // If this is an EXTLOAD, the kind of extension must match.
+        (LLD->getExtensionType() != RLD->getExtensionType() &&
+         // The only exception is if one of the extensions is anyext.
+         LLD->getExtensionType() != ISD::EXTLOAD &&
+         RLD->getExtensionType() != ISD::EXTLOAD) ||
         // FIXME: this discards src value information.  This is
         // over-conservative. It would be beneficial to be able to remember
         // both potential memory locations.  Since we are discarding
         // src value info, don't do the transformation if the memory
         // locations are not in the default address space.
-        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
-        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
-          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
-            LLDAddrSpace = PT->getAddressSpace();
-        }
-        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
-          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
-            RLDAddrSpace = PT->getAddressSpace();
-        }
-        SDValue Addr;
-        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
-          if (TheSelect->getOpcode() == ISD::SELECT) {
-            // Check that the condition doesn't reach either load.  If so, folding
-            // this will induce a cycle into the DAG.
-            if ((!LLD->hasAnyUseOfValue(1) ||
-                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
-                (!RLD->hasAnyUseOfValue(1) ||
-                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
-              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
-                                 LLD->getBasePtr().getValueType(),
-                                 TheSelect->getOperand(0), LLD->getBasePtr(),
-                                 RLD->getBasePtr());
-            }
-          } else {
-            // Check that the condition doesn't reach either load.  If so, folding
-            // this will induce a cycle into the DAG.
-            if ((!LLD->hasAnyUseOfValue(1) ||
-                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
-                (!RLD->hasAnyUseOfValue(1) ||
-                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
-              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
-                                 LLD->getBasePtr().getValueType(),
-                                 TheSelect->getOperand(0),
-                                 TheSelect->getOperand(1),
-                                 LLD->getBasePtr(), RLD->getBasePtr(),
-                                 TheSelect->getOperand(4));
-            }
-          }
-        }
-
-        if (Addr.getNode()) {
-          SDValue Load;
-          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
-            Load = DAG.getLoad(TheSelect->getValueType(0),
-                               TheSelect->getDebugLoc(),
-                               LLD->getChain(),
-                               Addr, 0, 0,
-                               LLD->isVolatile(),
-                               LLD->isNonTemporal(),
-                               LLD->getAlignment());
-          } else {
-            Load = DAG.getExtLoad(LLD->getExtensionType(),
-                                  TheSelect->getValueType(0),
-                                  TheSelect->getDebugLoc(),
-                                  LLD->getChain(), Addr, 0, 0,
-                                  LLD->getMemoryVT(),
-                                  LLD->isVolatile(),
-                                  LLD->isNonTemporal(),
-                                  LLD->getAlignment());
-          }
+        LLD->getPointerInfo().getAddrSpace() != 0 ||
+        RLD->getPointerInfo().getAddrSpace() != 0)
+      return false;
 
-          // Users of the select now use the result of the load.
-          CombineTo(TheSelect, Load);
+    // Check that the select condition doesn't reach either load.  If so,
+    // folding this will induce a cycle into the DAG.  If not, this is safe to
+    // xform, so create a select of the addresses.
+    SDValue Addr;
+    if (TheSelect->getOpcode() == ISD::SELECT) {
+      SDNode *CondNode = TheSelect->getOperand(0).getNode();
+      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+        return false;
+      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0), LLD->getBasePtr(),
+                         RLD->getBasePtr());
+    } else {  // Otherwise SELECT_CC
+      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+      if ((LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+          (LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+        return false;
 
-          // Users of the old loads now use the new load's chain.  We know the
-          // old-load value is dead now.
-          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
-          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
-          return true;
-        }
-      }
-    }
+      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0),
+                         TheSelect->getOperand(1),
+                         LLD->getBasePtr(), RLD->getBasePtr(),
+                         TheSelect->getOperand(4));
+    }
+
+    SDValue Load;
+    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+      Load = DAG.getLoad(TheSelect->getValueType(0),
+                         TheSelect->getDebugLoc(),
+                         // FIXME: Discards pointer info.
+                         LLD->getChain(), Addr, MachinePointerInfo(),
+                         LLD->isVolatile(), LLD->isNonTemporal(),
+                         LLD->getAlignment());
+    } else {
+      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+                            RLD->getExtensionType() : LLD->getExtensionType(),
+                            TheSelect->getDebugLoc(),
+                            TheSelect->getValueType(0),
+                            // FIXME: Discards pointer info.
+                            LLD->getChain(), Addr, MachinePointerInfo(),
+                            LLD->getMemoryVT(), LLD->isVolatile(),
+                            LLD->isNonTemporal(), LLD->getAlignment());
+    }
+
+    // Users of the select now use the result of the load.
+    CombineTo(TheSelect, Load);
+
+    // Users of the old loads now use the new load's chain.  We know the
+    // old-load value is dead now.
+    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+    return true;
   }
 
   return false;
@@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
                                       ISD::CondCode CC, bool NotExtCompare) {
   // (x ? y : y) -> y.
   if (N2 == N3) return N2;
-  
+
   EVT VT = N2.getValueType();
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         return DAG.getNode(ISD::FABS, DL, VT, N3);
     }
   }
-  
+
   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   // in it.  This is a win when the constant is not otherwise available because
@@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         };
         const Type *FPTy = Elts[0]->getType();
         const TargetData &TD = *TLI.getTargetData();
-        
+
         // Create a ConstantArray of the two constants.
         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
@@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         SDValue Zero = DAG.getIntPtrConstant(0);
         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
         SDValue One = DAG.getIntPtrConstant(EltSize);
-        
+
         SDValue Cond = DAG.getSetCC(DL,
                                     TLI.getSetCCResultType(N0.getValueType()),
                                     N0, N1, CC);
@@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
                             CstOffset);
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
-                           PseudoSourceValue::getConstantPool(), 0, false,
+                           MachinePointerInfo::getConstantPool(), false,
                            false, Alignment);
 
       }
-    }  
+    }
 
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
     }
   }
 
+  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+  // where y is has a single bit set.
+  // A plaintext description would be, we can turn the SELECT_CC into an AND
+  // when the condition can be materialized as an all-ones register.  Any
+  // single bit-test can be materialized as an all-ones register with
+  // shift-left and shift-right-arith.
+  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+      N0->getValueType(0) == VT &&
+      N1C && N1C->isNullValue() &&
+      N2C && N2C->isNullValue()) {
+    SDValue AndLHS = N0->getOperand(0);
+    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+      // Shift the tested bit over the sign bit.
+      APInt AndMask = ConstAndRHS->getAPIntValue();
+      SDValue ShlAmt =
+        DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+      // Now arithmetic right shift it all the way over, so the result is either
+      // all-ones, or zero.
+      SDValue ShrAmt =
+        DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+    }
+  }
+
   // fold select C, 16, 0 -> shl C, 4
   if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
       TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
@@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
 }
 
 /// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself.  Provides base object and offset as results.
+// to alias with anything but itself.  Provides base object and offset as
+// results.
 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
                            const GlobalValue *&GV, void *&CV) {
   // Assume it is a primitive operation.
@@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
       Offset += C->getZExtValue();
     }
   }
-  
+
   // Return the underlying GlobalValue, and update the Offset.  Return false
   // for GlobalAddressSDNode since the same GlobalAddress may be represented
   // by multiple nodes with different offsets.
@@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
                           const Value *SrcValue1, int SrcValueOffset1,
                           unsigned SrcValueAlign1,
+                          const MDNode *TBAAInfo1,
                           SDValue Ptr2, int64_t Size2,
                           const Value *SrcValue2, int SrcValueOffset2,
-                          unsigned SrcValueAlign2) const {
+                          unsigned SrcValueAlign2,
+                          const MDNode *TBAAInfo2) const {
   // If they are the same then they must be aliases.
   if (Ptr1 == Ptr2) return true;
 
@@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
     return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
 
-  // If we know what the bases are, and they aren't identical, then we know they
-  // cannot alias.
+  // It is possible for different frame indices to alias each other, mostly
+  // when tail call optimization reuses return address slots for arguments.
+  // To catch this case, look up the actual index of frame indices to compute
+  // the real alias relationship.
+  if (isFrameIndex1 && isFrameIndex2) {
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+
+  // Otherwise, if we know what the bases are, and they aren't identical, then
+  // we know they cannot alias.
   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
     return false;
 
@@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
       (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
     int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
     int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-    
+
     // There is no overlap between these relatively aligned accesses of similar
     // size, return no alias.
     if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
       return false;
   }
-  
+
   if (CombinerGlobalAA) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
     int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
     int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
     AliasAnalysis::AliasResult AAResult =
-                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
     if (AAResult == AliasAnalysis::NoAlias)
       return false;
   }
@@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
                         SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue, 
+                        const Value *&SrcValue,
                         int &SrcValueOffset,
-                        unsigned &SrcValueAlign) const {
+                        unsigned &SrcValueAlign,
+                        const MDNode *&TBAAInfo) const {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     Ptr = LD->getBasePtr();
     Size = LD->getMemoryVT().getSizeInBits() >> 3;
     SrcValue = LD->getSrcValue();
     SrcValueOffset = LD->getSrcValueOffset();
     SrcValueAlign = LD->getOriginalAlignment();
+    TBAAInfo = LD->getTBAAInfo();
     return true;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     Ptr = ST->getBasePtr();
@@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
     SrcValue = ST->getSrcValue();
     SrcValueOffset = ST->getSrcValueOffset();
     SrcValueAlign = ST->getOriginalAlignment();
+    TBAAInfo = ST->getTBAAInfo();
   } else {
     llvm_unreachable("FindAliasInfo expected a memory operand");
   }
@@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   const Value *SrcValue;
   int SrcValueOffset;
   unsigned SrcValueAlign;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
-                              SrcValueAlign);
+  const MDNode *SrcTBAAInfo;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+                              SrcValueAlign, SrcTBAAInfo);
 
   // Starting off.
   Chains.push_back(OriginalChain);
   unsigned Depth = 0;
-  
+
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.back();
     Chains.pop_back();
-    
-    // For TokenFactor nodes, look at each operand and only continue up the 
-    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+
+    // For TokenFactor nodes, look at each operand and only continue up the
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll
     // find more and revert to original chain since the xform is unlikely to be
     // profitable.
-    // 
-    // FIXME: The depth check could be made to return the last non-aliasing 
+    //
+    // FIXME: The depth check could be made to return the last non-aliasing
     // chain we found before we hit a tokenfactor rather than the original
     // chain.
     if (Depth > 6 || Aliases.size() == 2) {
@@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
       const Value *OpSrcValue;
       int OpSrcValueOffset;
       unsigned OpSrcValueAlign;
+      const MDNode *OpSrcTBAAInfo;
       bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
                                     OpSrcValue, OpSrcValueOffset,
-                                    OpSrcValueAlign);
+                                    OpSrcValueAlign,
+                                    OpSrcTBAAInfo);
 
       // If chain is alias then stop here.
       if (!(IsLoad && IsOpLoad) &&
           isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  SrcTBAAInfo,
                   OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
-                  OpSrcValueAlign)) {
+                  OpSrcValueAlign, OpSrcTBAAInfo)) {
         Aliases.push_back(Chain);
       } else {
         // Look further up the chain.
@@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
     // If a single operand then chain to it.  We don't need to revisit it.
     return Aliases[0];
   }
-  
+
   // Construct a custom tailored token factor.
-  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                      &Aliases[0], Aliases.size());
 }
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a4eed71e65c0..490b857b0e9c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 /// startNewBlock - Set the current block to which generated machine
@@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
             TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
   }
-  
+
   // If target-independent code couldn't handle the value, give target-specific
   // code a try.
   if (!Reg && isa<Constant>(V))
     Reg = TargetMaterializeConstant(cast<Constant>(V));
-  
+
   // Don't cache constant materializations in the general ValueMap.
   // To do so would require tracking what uses they dominate.
   if (Reg != 0) {
@@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
     LocalValueMap[I] = Reg;
     return Reg;
   }
-  
+
   unsigned &AssignedReg = FuncInfo.ValueMap[I];
   if (AssignedReg == 0)
     // Use the new register.
@@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       // If this is a constant subscript, handle it quickly.
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
-        uint64_t Offs = 
+        uint64_t Offs =
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
         if (N == 0)
@@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         NIsKill = true;
         continue;
       }
-      
+
       // N = N + Idx * ElementSize;
       uint64_t ElementSize = TD.getTypeAllocSize(Ty);
       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
@@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {
       return true;
 
     const Value *Address = DI->getAddress();
-    if (!Address)
+    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
       return true;
-    if (isa<UndefValue>(Address))
-      return true;
-    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      // Building the map above is target independent.  Generating DBG_VALUE
-      // inline is target dependent; do this now.
-      (void)TargetSelectInstruction(cast<Instruction>(I));
+
+    unsigned Reg = 0;
+    unsigned Offset = 0;
+    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+      if (Arg->hasByValAttr()) {
+        // Byval arguments' frame index is recorded during argument lowering.
+        // Use this info directly.
+        Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+        if (Offset)
+          Reg = TRI.getFrameRegister(*FuncInfo.MF);
+      }
+    }
+    if (!Reg)
+      Reg = getRegForValue(Address);
+
+    if (Reg)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(Reg, RegState::Debug).addImm(Offset)
+        .addMetadata(DI->getVariable());
     return true;
   }
   case Intrinsic::dbg_value: {
@@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {
     } else {
       // We can't yet handle anything else here because it would require
       // generating code, thus altering codegen because of debug info.
-      // Insert an undef so we can see what we dropped.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
-        .addReg(0U).addImm(DI->getOffset())
-        .addMetadata(DI->getVariable());
-    }     
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    }
     return true;
   }
   case Intrinsic::eh_exception: {
@@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {
 bool FastISel::SelectCast(const User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
-    
+
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
-    
+
   // Check if the destination type is legal. Or as a special case,
   // it may be i1 if we're doing a truncate because that's
   // easy and somewhat common.
@@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
                                   InputReg, InputRegIsKill);
   if (!ResultReg)
     return false;
-    
+
   UpdateValueMap(I, ResultReg);
   return true;
 }
@@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {
     return true;
   }
 
-  // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+  // Bitcasts of other values become reg-reg copies or BITCAST operators.
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
-  
+
   if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
       DstVT == MVT::Other || !DstVT.isSimple() ||
       !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
     // Unhandled type. Halt "fast" selection and bail.
     return false;
-  
+
   unsigned Op0 = getRegForValue(I->getOperand(0));
   if (Op0 == 0)
     // Unhandled operand. Halt "fast" selection and bail.
     return false;
 
   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
-  
+
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
   if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
@@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {
               ResultReg).addReg(Op0);
     }
   }
-  
-  // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+
+  // If the reg-reg copy failed, select a BITCAST opcode.
   if (!ResultReg)
     ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
-                           ISD::BIT_CONVERT, Op0, Op0IsKill);
-  
+                           ISD::BITCAST, Op0, Op0IsKill);
+
   if (!ResultReg)
     return false;
-  
+
   UpdateValueMap(I, ResultReg);
   return true;
 }
@@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {
     return false;
 
   unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
-                               ISD::BIT_CONVERT, OpReg, OpRegIsKill);
+                               ISD::BITCAST, OpReg, OpRegIsKill);
   if (IntReg == 0)
     return false;
 
@@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {
     return false;
 
   ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
-                         ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
+                         ISD::BITCAST, IntResultReg, /*Kill=*/true);
   if (ResultReg == 0)
     return false;
 
@@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
 
     // Dynamic-sized alloca is not handled yet.
     return false;
-    
+
   case Instruction::Call:
     return SelectCall(I);
-  
+
   case Instruction::BitCast:
     return SelectBitCast(I);
 
@@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,
   return 0;
 }
 
-unsigned FastISel::FastEmit_rr(MVT, MVT, 
+unsigned FastISel::FastEmit_rr(MVT, MVT,
                                unsigned,
                                unsigned /*Op0*/, bool /*Op0IsKill*/,
                                unsigned /*Op1*/, bool /*Op1IsKill*/) {
@@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                   uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
   const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-  
+
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
   else {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5ef6404ee5d6..98582ba99f14 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 61c2a90e7edc..e309defba20f 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -31,11 +31,11 @@
 using namespace llvm;
 
 /// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
 /// not go into the resulting MachineInstr).
 unsigned InstrEmitter::CountResults(SDNode *Node) {
   unsigned N = Node->getNumValues();
-  while (N && Node->getValueType(N - 1) == MVT::Flag)
+  while (N && Node->getValueType(N - 1) == MVT::Glue)
     --N;
   if (N && Node->getValueType(N - 1) == MVT::Other)
     --N;    // Skip over chain result.
@@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
 }
 
 /// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
 /// Compute the number of actual operands that will go into the resulting
 /// MachineInstr.
 unsigned InstrEmitter::CountOperands(SDNode *Node) {
   unsigned N = Node->getNumOperands();
-  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
     --N;
   if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
     --N; // Ignore chain if it exists.
@@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
     if (IsClone)
       VRBaseMap.erase(Op);
     bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
-    isNew = isNew; // Silence compiler warning.
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Node emitted out of order - early");
     return;
   }
@@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
           if (Op.getNode() != Node || Op.getResNo() != ResNo)
             continue;
           EVT VT = Node->getValueType(Op.getResNo());
-          if (VT == MVT::Other || VT == MVT::Flag)
+          if (VT == MVT::Other || VT == MVT::Glue)
             continue;
           Match = false;
           if (User->isMachineOpcode()) {
@@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   if (IsClone)
     VRBaseMap.erase(Op);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     if (IsClone)
       VRBaseMap.erase(Op);
     bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-    isNew = isNew; // Silence compiler warning.
+    (void)isNew; // Silence compiler warning.
     assert(isNew && "Node emitted out of order - early");
   }
 }
@@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
                                  bool IsDebug, bool IsClone, bool IsCloned) {
   assert(Op.getValueType() != MVT::Other &&
-         Op.getValueType() != MVT::Flag &&
-         "Chain and flag operands should occur at end of operand list!");
+         Op.getValueType() != MVT::Glue &&
+         "Chain and glue operands should occur at end of operand list!");
   // Get/emit the operand.
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
@@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                                             BA->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
-           Op.getValueType() != MVT::Flag &&
-           "Chain and flag operands should occur at end of operand list!");
+           Op.getValueType() != MVT::Glue &&
+           "Chain and glue operands should occur at end of operand list!");
     AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
                        IsDebug, IsClone, IsCloned);
   }
@@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
 
     // Figure out the register class to create for the destreg.
     unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
-    const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
-    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
-    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
-    // Figure out the register class to create for the destreg.
-    // Note that if we're going to directly use an existing register,
-    // it must be precisely the required class, and not a subclass
-    // thereof.
-    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
-      // Create the reg
-      assert(SRC && "Couldn't find source register class");
-      VRBase = MRI->createVirtualRegister(SRC);
-    }
+    MachineInstr *DefMI = MRI->getVRegDef(VReg);
+    unsigned SrcReg, DstReg, DefSubIdx;
+    if (DefMI &&
+        TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+        SubIdx == DefSubIdx) {
+      // Optimize these:
+      // r1025 = s/zext r1024, 4
+      // r1026 = extract_subreg r1025, 4
+      // to a copy
+      // r1026 = copy r1024
+      const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+      VRBase = MRI->createVirtualRegister(TRC);
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+    } else {
+      const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+      const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+      assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+      // Figure out the register class to create for the destreg.
+      // Note that if we're going to directly use an existing register,
+      // it must be precisely the required class, and not a subclass
+      // thereof.
+      if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+        // Create the reg
+        assert(SRC && "Couldn't find source register class");
+        VRBase = MRI->createVirtualRegister(SRC);
+      }
 
-    // Create the extract_subreg machine instruction.
-    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
-                               TII->get(TargetOpcode::COPY), VRBase);
+      // Create the extract_subreg machine instruction.
+      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                                 TII->get(TargetOpcode::COPY), VRBase);
 
-    // Add source, and subreg index
-    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
-               IsClone, IsCloned);
-    assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
-           "Cannot yet extract from physregs");
-    MI->getOperand(1).setSubReg(SubIdx);
-    MBB->insert(InsertPos, MI);
+      // Add source, and subreg index
+      AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+      assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+             "Cannot yet extract from physregs");
+      MI->getOperand(1).setSubReg(SubIdx);
+      MBB->insert(InsertPos, MI);
+    }
   } else if (Opc == TargetOpcode::INSERT_SUBREG ||
              Opc == TargetOpcode::SUBREG_TO_REG) {
     SDValue N0 = Node->getOperand(0);
@@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
      
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
       const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
       const TargetRegisterClass *SRC =
         TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
-      if (!SRC)
-        llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
-      if (SRC != RC) {
+      if (SRC && SRC != RC) {
         MRI->setRegClass(NewVReg, SRC);
         RC = SRC;
       }
@@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   MBB->insert(InsertPos, MI);
   SDValue Op(Node, 0);
   bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
-  isNew = isNew; // Silence compiler warning.
+  (void)isNew; // Silence compiler warning.
   assert(isNew && "Node emitted out of order - early");
 }
 
@@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // The MachineInstr constructor adds implicit-def operands. Scan through
   // these to determine which are dead.
   if (MI->getNumOperands() != 0 &&
-      Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
     // First, collect all used registers.
     SmallVector<unsigned, 8> UsedRegs;
-    for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
       if (F->getOpcode() == ISD::CopyFromReg)
         UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
       else {
@@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
           if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
             unsigned Reg = R->getReg();
-            if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
               UsedRegs.push_back(Reg);
           }
       }
@@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // hook knows where in the block to insert the replacement code.
   MBB->insert(InsertPos, MI);
 
-  if (II.usesCustomInsertionHook()) {
-    // Insert this instruction into the basic block using a target
-    // specific inserter which may returns a new basic block.
-    bool AtEnd = InsertPos == MBB->end();
-    MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
-    if (NewMBB != MBB) {
-      if (AtEnd)
-        InsertPos = NewMBB->end();
-      MBB = NewMBB;
-    }
-    return;
-  }
-  
-  // Additional results must be an physical register def.
+  // Additional results must be physical register defs.
   if (HasPhysRegOuts) {
     for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
       unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
       // If there are no uses, mark the register as dead now, so that
       // MachineLICM/Sink can see that it's dead. Don't do this if the
-      // node has a Flag value, for the benefit of targets still using
-      // Flag for values in physregs.
-      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+      // node has a Glue value, for the benefit of targets still using
+      // Glue for values in physregs.
+      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
         MI->addRegisterDead(Reg, TRI);
     }
   }
   
   // If the instruction has implicit defs and the node doesn't, mark the
-  // implicit def as dead.  If the node has any flag outputs, we don't do this
-  // because we don't know what implicit defs are being used by flagged nodes.
-  if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+  // implicit def as dead.  If the node has any glue outputs, we don't do this
+  // because we don't know what implicit defs are being used by glued nodes.
+  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
     if (const unsigned *IDList = II.getImplicitDefs()) {
       for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
            i != e; ++i)
@@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
       
   case ISD::INLINEASM: {
     unsigned NumOps = Node->getNumOperands();
-    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-      --NumOps;  // Ignore the flag operand.
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+      --NumOps;  // Ignore the glue operand.
       
     // Create the inline asm machine instruction.
     MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
     MI->addOperand(MachineOperand::CreateES(AsmStr));
       
-    // Add the isAlignStack bit.
-    int64_t isAlignStack =
-      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+    // Add the HasSideEffect and isAlignStack bits.
+    int64_t ExtraInfo =
+      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
                           getZExtValue();
-    MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
 
     // Add all of the operand registers to the instruction.
     for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2981cd3f1cab..49c862ce3e0b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -65,11 +66,6 @@ class SelectionDAGLegalize {
   /// against each other, including inserted libcalls.
   SDValue LastCALLSEQ_END;
 
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
-
   enum LegalizeAction {
     Legal,      // The target natively supports this operation.
     Promote,    // This operation should be executed in a larger type.
@@ -91,6 +87,9 @@ class SelectionDAGLegalize {
     // If someone requests legalization of the new node, return itself.
     if (From != To)
       LegalizedNodes.insert(std::make_pair(To, To));
+    
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
   }
 
 public:
@@ -172,6 +171,7 @@ private:
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
 
 void SelectionDAGLegalize::LegalizeDAG() {
   LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
 
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
@@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
 
 /// FindCallEndFromCallStart - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
-  if (Node->getOpcode() == ISD::CALLSEQ_END)
-    return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+  // Nested CALLSEQ_START/END constructs aren't yet legal,
+  // but we can DTRT and handle them correctly here.
+  if (Node->getOpcode() == ISD::CALLSEQ_START)
+    depth++;
+  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    depth--;
+    if (depth == 0)
+      return Node;
+  }
   if (Node->use_empty())
     return 0;   // No CallSeqEnd
 
@@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
     SDNode *User = *UI;
     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
       if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User))
+        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
           return Result;
   }
   return 0;
@@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
 /// FindCallStartFromCallEnd - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_START node that initiates the call sequence.
 static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  int nested = 0;
   assert(Node && "Didn't find callseq_start for a call??");
-  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
-  assert(Node->getOperand(0).getValueType() == MVT::Other &&
-         "Node doesn't have a token chain argument!");
-  return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+    Node = Node->getOperand(0).getNode();
+    assert(Node->getOperand(0).getValueType() == MVT::Other &&
+           "Node doesn't have a token chain argument!");
+    switch (Node->getOpcode()) {
+    default:
+      break;
+    case ISD::CALLSEQ_START:
+      if (!nested)
+        return Node;
+      nested--;
+      break;
+    case ISD::CALLSEQ_END:
+      nested++;
+      break;
+    }
+  }
+  return 0;
 }
 
 /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
   SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
                           DAG.getEntryNode(),
-                          CPIdx, PseudoSourceValue::getConstantPool(),
-                          0, VT, false, false, Alignment);
+                          CPIdx, MachinePointerInfo::getConstantPool(),
+                          VT, false, false, Alignment);
   return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     PseudoSourceValue::getConstantPool(), 0, false, false,
+                     MachinePointerInfo::getConstantPool(), false, false,
                      Alignment);
 }
 
@@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Val = ST->getValue();
   EVT VT = Val.getValueType();
   int Alignment = ST->getAlignment();
-  int SVOffset = ST->getSrcValueOffset();
   DebugLoc dl = ST->getDebugLoc();
   if (ST->getMemoryVT().isFloatingPoint() ||
       ST->getMemoryVT().isVector()) {
@@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Expand to a bitconvert of the value to the integer type of the
       // same size, then a (misaligned) int store.
       // FIXME: Does not handle truncating floating point stores!
-      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
-      return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
-                          SVOffset, ST->isVolatile(), ST->isNonTemporal(),
-                          Alignment);
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
@@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Perform the original store, only redirected to the stack slot.
       SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, NULL, 0, StoredVT,
-                                        false, false, 0);
+                                        Val, StackPtr, MachinePointerInfo(),
+                                        StoredVT, false, false, 0);
       SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
       SmallVector<SDValue, 8> Stores;
       unsigned Offset = 0;
@@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Do all but one copies using the full register width.
       for (unsigned i = 1; i < NumRegs; i++) {
         // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                   MachinePointerInfo(),
                                    false, false, 0);
         // Store it to the final location.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
-                                      ST->getSrcValue(), SVOffset + Offset,
+                                    ST->getPointerInfo().getWithOffset(Offset),
                                       ST->isVolatile(), ST->isNonTemporal(),
                                       MinAlign(ST->getAlignment(), Offset)));
         // Increment the pointers.
@@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                                     8 * (StoredBytes - Offset));
 
       // Load from the stack slot.
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
-                                    NULL, 0, MemVT, false, false, 0);
+      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                    MachinePointerInfo(),
+                                    MemVT, false, false, 0);
 
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                                         ST->getSrcValue(), SVOffset + Offset,
+                                         ST->getPointerInfo()
+                                           .getWithOffset(Offset),
                                          MemVT, ST->isVolatile(),
                                          ST->isNonTemporal(),
                                          MinAlign(ST->getAlignment(), Offset)));
@@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   // Store the two parts
   SDValue Store1, Store2;
   Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
-                             ST->getSrcValue(), SVOffset, NewStoredVT,
+                             ST->getPointerInfo(), NewStoredVT,
                              ST->isVolatile(), ST->isNonTemporal(), Alignment);
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getConstant(IncrementSize, TLI.getPointerTy()));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
-                             ST->getSrcValue(), SVOffset + IncrementSize,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
                              NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
                              Alignment);
 
@@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 static
 SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                             const TargetLowering &TLI) {
-  int SVOffset = LD->getSrcValueOffset();
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0);
@@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     if (TLI.isTypeLegal(intVT)) {
       // Expand to a (misaligned) integer load of the same size,
       // then bitconvert to floating point or vector.
-      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
-                                    SVOffset, LD->isVolatile(),
+      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                                    LD->isVolatile(),
                                     LD->isNonTemporal(), LD->getAlignment());
-      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
 
       SDValue Ops[] = { Result, Chain };
       return DAG.getMergeValues(Ops, 2, dl);
-    } else {
-      // Copy the value to a (aligned) stack slot using (unaligned) integer
-      // loads and stores, then do a (aligned) load from the stack slot.
-      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
-      unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
-      unsigned RegBytes = RegVT.getSizeInBits() / 8;
-      unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
-      // Make sure the stack slot is also aligned for the register type.
-      SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
-      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
-      SmallVector<SDValue, 8> Stores;
-      SDValue StackPtr = StackBase;
-      unsigned Offset = 0;
-
-      // Do all but one copies using the full register width.
-      for (unsigned i = 1; i < NumRegs; i++) {
-        // Load one integer register's worth from the original location.
-        SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
-                                   SVOffset + Offset, LD->isVolatile(),
-                                   LD->isNonTemporal(),
-                                   MinAlign(LD->getAlignment(), Offset));
-        // Follow the load with a store to the stack slot.  Remember the store.
-        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                      NULL, 0, false, false, 0));
-        // Increment the pointers.
-        Offset += RegBytes;
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
-        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                               Increment);
-      }
+    }
 
-      // The last copy may be partial.  Do an extending load.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    8 * (LoadedBytes - Offset));
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
-                                    LD->getSrcValue(), SVOffset + Offset,
-                                    MemVT, LD->isVolatile(),
-                                    LD->isNonTemporal(),
-                                    MinAlign(LD->getAlignment(), Offset));
+    // Copy the value to a (aligned) stack slot using (unaligned) integer
+    // loads and stores, then do a (aligned) load from the stack slot.
+    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    SDValue StackPtr = StackBase;
+    unsigned Offset = 0;
+
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the original location.
+      SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+                                 LD->getPointerInfo().getWithOffset(Offset),
+                                 LD->isVolatile(), LD->isNonTemporal(),
+                                 MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
-      // On big-endian machines this requires a truncating store to ensure
-      // that the bits end up in the right place.
-      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                         NULL, 0, MemVT, false, false, 0));
-
-      // The order of the stores doesn't matter - say it with a TokenFactor.
-      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                               Stores.size());
-
-      // Finally, perform the original load only redirected to the stack slot.
-      Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
-                            NULL, 0, LoadedVT, false, false, 0);
-
-      // Callers expect a MERGE_VALUES node.
-      SDValue Ops[] = { Load, TF };
-      return DAG.getMergeValues(Ops, 2, dl);
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+                                    MachinePointerInfo(), false, false, 0));
+      // Increment the pointers.
+      Offset += RegBytes;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
     }
+
+    // The last copy may be partial.  Do an extending load.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (LoadedBytes - Offset));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+                                  LD->getPointerInfo().getWithOffset(Offset),
+                                  MemVT, LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  MinAlign(LD->getAlignment(), Offset));
+    // Follow the load with a store to the stack slot.  Remember the store.
+    // On big-endian machines this requires a truncating store to ensure
+    // that the bits end up in the right place.
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+                                       MachinePointerInfo(), MemVT,
+                                       false, false, 0));
+
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                             Stores.size());
+
+    // Finally, perform the original load only redirected to the stack slot.
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+                          MachinePointerInfo(), LoadedVT, false, false, 0);
+
+    // Callers expect a MERGE_VALUES node.
+    SDValue Ops[] = { Load, TF };
+    return DAG.getMergeValues(Ops, 2, dl);
   }
   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
          "Unaligned load of unsupported type.");
@@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   // Load the value in two parts
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   } else {
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   }
 
@@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            PseudoSourceValue::getFixedStack(SPFI), 0,
+                            MachinePointerInfo::getFixedStack(SPFI),
                             false, false, 0);
 
   // Truncate or zero extend offset to target pointer type.
@@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
-  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
-                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
                          false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0,
-                     false, false, 0);
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
 }
 
 
@@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   SDValue Tmp1 = ST->getChain();
   SDValue Tmp2 = ST->getBasePtr();
   SDValue Tmp3;
-  int SVOffset = ST->getSrcValueOffset();
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
   bool isNonTemporal = ST->isNonTemporal();
@@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       Tmp3 = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
-      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, isNonTemporal, Alignment);
-    } else if (CFP->getValueType(0) == MVT::f64) {
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                          isVolatile, isNonTemporal, Alignment);
+    }
+
+    if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (getTypeAction(MVT::i64) == Legal) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
-        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                            SVOffset, isVolatile, isNonTemporal, Alignment);
-      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            isVolatile, isNonTemporal, Alignment);
+      }
+
+      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
         const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
-        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
         SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
-        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, isNonTemporal, Alignment);
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+                          isNonTemporal, Alignment);
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(4));
-        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+                          ST->getPointerInfo().getWithOffset(4),
                           isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
 
         return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
@@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   bool isCustom = false;
 
   // Figure out the correct action; the way to query this varies by opcode
-  TargetLowering::LegalizeAction Action;
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
   bool SimpleFinishLegalizing = true;
   switch (Node->getOpcode()) {
   case ISD::INTRINSIC_W_CHAIN:
@@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     }
     break;
   case ISD::CALLSEQ_START: {
+    static int depth = 0;
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
 
     // Recursively Legalize all of the inputs of the call end that do not lead
@@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     // Merge in the last call to ensure that this call starts after the last
     // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
       Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                          Tmp1, LastCALLSEQ_END);
       Tmp1 = LegalizeOp(Tmp1);
@@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // sequence have been legalized, legalize the call itself.  During this
     // process, no libcalls can/will be inserted, guaranteeing that no calls
     // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+
+    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
     // Note that we are selecting this call!
     LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
 
+    depth++;
     // Legalize the call, starting from the CALLSEQ_END.
     LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    depth--;
+    assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+    if (depth > 0)
+      LastCALLSEQ_END = Saved_LastCALLSEQ_END;
     return Result;
   }
   case ISD::CALLSEQ_END:
@@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
     // Do not try to legalize the target-specific arguments (#1+), except for
     // an optional flag input.
-    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
       if (Tmp1 != Node->getOperand(0)) {
         SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
         Ops[0] = Tmp1;
@@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          Result.getResNo());
       }
     }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
     // This finishes up call legalization.
-    IsLegalizingCall = false;
-
     // If the CALLSEQ_END node has a flag, remember that we legalized it.
     AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
     if (Node->getNumValues() == 2)
@@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Change base type to a different vector type.
         EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 
-        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                           LD->getSrcValueOffset(),
+        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
                            LD->isVolatile(), LD->isNonTemporal(),
                            LD->getAlignment());
-        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
         Tmp4 = LegalizeOp(Tmp1.getValue(1));
         break;
       }
@@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       AddLegalizedOperand(SDValue(Node, 0), Tmp3);
       AddLegalizedOperand(SDValue(Node, 1), Tmp4);
       return Op.getResNo() ? Tmp4 : Tmp3;
-    } else {
-      EVT SrcVT = LD->getMemoryVT();
-      unsigned SrcWidth = SrcVT.getSizeInBits();
-      int SVOffset = LD->getSrcValueOffset();
-      unsigned Alignment = LD->getAlignment();
-      bool isVolatile = LD->isVolatile();
-      bool isNonTemporal = LD->isNonTemporal();
-
-      if (SrcWidth != SrcVT.getStoreSizeInBits() &&
-          // Some targets pretend to have an i1 loading operation, and actually
-          // load an i8.  This trick is correct for ZEXTLOAD because the top 7
-          // bits are guaranteed to be zero; it helps the optimizers understand
-          // that these bits are zero.  It is also useful for EXTLOAD, since it
-          // tells the optimizers that those bits are undefined.  It would be
-          // nice to have an effective generic way of getting these benefits...
-          // Until such a way is found, don't insist on promoting i1 here.
-          (SrcVT != MVT::i1 ||
-           TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
-        // Promote to a byte-sized load if not loading an integral number of
-        // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
-        unsigned NewWidth = SrcVT.getStoreSizeInBits();
-        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
-        SDValue Ch;
-
-        // The extra bits are guaranteed to be zero, since we stored them that
-        // way.  A zext load from NVT thus automatically gives zext from SrcVT.
-
-        ISD::LoadExtType NewExtType =
-          ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
-        Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
-                                Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
-                                NVT, isVolatile, isNonTemporal, Alignment);
-
-        Ch = Result.getValue(1); // The chain.
-
-        if (ExtType == ISD::SEXTLOAD)
-          // Having the top bits zero doesn't help when sign extending.
-          Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
-                               Result.getValueType(),
-                               Result, DAG.getValueType(SrcVT));
-        else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
-          // All the top bits are guaranteed to be zero - inform the optimizers.
-          Result = DAG.getNode(ISD::AssertZext, dl,
-                               Result.getValueType(), Result,
-                               DAG.getValueType(SrcVT));
-
-        Tmp1 = LegalizeOp(Result);
-        Tmp2 = LegalizeOp(Ch);
-      } else if (SrcWidth & (SrcWidth - 1)) {
-        // If not loading a power-of-2 number of bits, expand as two loads.
-        assert(!SrcVT.isVector() && "Unsupported extload!");
-        unsigned RoundWidth = 1 << Log2_32(SrcWidth);
-        assert(RoundWidth < SrcWidth);
-        unsigned ExtraWidth = SrcWidth - RoundWidth;
-        assert(ExtraWidth < RoundWidth);
-        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
-               "Load size not an integral number of bytes!");
-        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
-        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
-        SDValue Lo, Hi, Ch;
-        unsigned IncrementSize;
+    }
 
-        if (TLI.isLittleEndian()) {
-          // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
-          // Load the bottom RoundWidth bits.
-          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
-                              Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              isNonTemporal, Alignment);
-
-          // Load the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                             DAG.getIntPtrConstant(IncrementSize));
-          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile, isNonTemporal,
-                              MinAlign(Alignment, IncrementSize));
-
-          // Build a factor node to remember that this load is independent of
-          // the other one.
-          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                           Hi.getValue(1));
-
-          // Move the top bits to the right place.
-          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+    EVT SrcVT = LD->getMemoryVT();
+    unsigned SrcWidth = SrcVT.getSizeInBits();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+    bool isNonTemporal = LD->isNonTemporal();
+
+    if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+        // Some targets pretend to have an i1 loading operation, and actually
+        // load an i8.  This trick is correct for ZEXTLOAD because the top 7
+        // bits are guaranteed to be zero; it helps the optimizers understand
+        // that these bits are zero.  It is also useful for EXTLOAD, since it
+        // tells the optimizers that those bits are undefined.  It would be
+        // nice to have an effective generic way of getting these benefits...
+        // Until such a way is found, don't insist on promoting i1 here.
+        (SrcVT != MVT::i1 ||
+         TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+      // Promote to a byte-sized load if not loading an integral number of
+      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+      unsigned NewWidth = SrcVT.getStoreSizeInBits();
+      EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+      SDValue Ch;
+
+      // The extra bits are guaranteed to be zero, since we stored them that
+      // way.  A zext load from NVT thus automatically gives zext from SrcVT.
+
+      ISD::LoadExtType NewExtType =
+        ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                              Tmp1, Tmp2, LD->getPointerInfo(),
+                              NVT, isVolatile, isNonTemporal, Alignment);
+
+      Ch = Result.getValue(1); // The chain.
+
+      if (ExtType == ISD::SEXTLOAD)
+        // Having the top bits zero doesn't help when sign extending.
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                             Result.getValueType(),
+                             Result, DAG.getValueType(SrcVT));
+      else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+        // All the top bits are guaranteed to be zero - inform the optimizers.
+        Result = DAG.getNode(ISD::AssertZext, dl,
+                             Result.getValueType(), Result,
+                             DAG.getValueType(SrcVT));
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else if (SrcWidth & (SrcWidth - 1)) {
+      // If not loading a power-of-2 number of bits, expand as two loads.
+      assert(!SrcVT.isVector() && "Unsupported extload!");
+      unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+      assert(RoundWidth < SrcWidth);
+      unsigned ExtraWidth = SrcWidth - RoundWidth;
+      assert(ExtraWidth < RoundWidth);
+      assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+             "Load size not an integral number of bytes!");
+      EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+      EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+      SDValue Lo, Hi, Ch;
+      unsigned IncrementSize;
+
+      if (TLI.isLittleEndian()) {
+        // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+        // Load the bottom RoundWidth bits.
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+                            Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      } else {
+        // Big endian - avoid unaligned loads.
+        // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+        // Load the top RoundWidth bits.
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      }
 
-          // Join the hi and lo parts.
-          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else {
+      switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp2, LD->getOffset()),
+                         Result.getResNo());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.getNode()) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
         } else {
-          // Big endian - avoid unaligned loads.
-          // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
-          // Load the top RoundWidth bits.
-          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              isNonTemporal, Alignment);
-
-          // Load the remaining ExtraWidth bits.
-          IncrementSize = RoundWidth / 8;
-          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                             DAG.getIntPtrConstant(IncrementSize));
-          Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
-                              Node->getValueType(0), dl, Tmp1, Tmp2,
-                              LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile, isNonTemporal,
-                              MinAlign(Alignment, IncrementSize));
-
-          // Build a factor node to remember that this load is independent of
-          // the other one.
-          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                           Hi.getValue(1));
-
-          // Move the top bits to the right place.
-          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
-          // Join the hi and lo parts.
-          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
-        }
-
-        Tmp1 = LegalizeOp(Result);
-        Tmp2 = LegalizeOp(Ch);
-      } else {
-        switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-        default: assert(0 && "This action is not supported yet!");
-        case TargetLowering::Custom:
-          isCustom = true;
-          // FALLTHROUGH
-        case TargetLowering::Legal:
-          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                  Tmp1, Tmp2, LD->getOffset()),
-                           Result.getResNo());
-          Tmp1 = Result.getValue(0);
-          Tmp2 = Result.getValue(1);
-
-          if (isCustom) {
-            Tmp3 = TLI.LowerOperation(Result, DAG);
-            if (Tmp3.getNode()) {
-              Tmp1 = LegalizeOp(Tmp3);
-              Tmp2 = LegalizeOp(Tmp3.getValue(1));
-            }
-          } else {
-            // If this is an unaligned load and the target doesn't support it,
-            // expand it.
-            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-              const Type *Ty =
-                LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-              unsigned ABIAlignment =
-                TLI.getTargetData()->getABITypeAlignment(Ty);
-              if (LD->getAlignment() < ABIAlignment){
-                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                             DAG, TLI);
-                Tmp1 = Result.getOperand(0);
-                Tmp2 = Result.getOperand(1);
-                Tmp1 = LegalizeOp(Tmp1);
-                Tmp2 = LegalizeOp(Tmp2);
-              }
+          // If this is an unaligned load and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+            const Type *Ty =
+              LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment =
+              TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (LD->getAlignment() < ABIAlignment){
+              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                           DAG, TLI);
+              Tmp1 = Result.getOperand(0);
+              Tmp2 = Result.getOperand(1);
+              Tmp1 = LegalizeOp(Tmp1);
+              Tmp2 = LegalizeOp(Tmp2);
             }
           }
-          break;
-        case TargetLowering::Expand:
-          if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
-            SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
-                                       LD->getSrcValueOffset(),
-                                       LD->isVolatile(), LD->isNonTemporal(),
-                                       LD->getAlignment());
-            unsigned ExtendOp;
-            switch (ExtType) {
-            case ISD::EXTLOAD:
-              ExtendOp = (SrcVT.isFloatingPoint() ?
-                          ISD::FP_EXTEND : ISD::ANY_EXTEND);
-              break;
-            case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
-            case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
-            default: llvm_unreachable("Unexpected extend load type!");
-            }
-            Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
-            Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
-            Tmp2 = LegalizeOp(Load.getValue(1));
+        }
+        break;
+      case TargetLowering::Expand:
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+          SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+                                     LD->getPointerInfo(),
+                                     LD->isVolatile(), LD->isNonTemporal(),
+                                     LD->getAlignment());
+          unsigned ExtendOp;
+          switch (ExtType) {
+          case ISD::EXTLOAD:
+            ExtendOp = (SrcVT.isFloatingPoint() ?
+                        ISD::FP_EXTEND : ISD::ANY_EXTEND);
             break;
+          case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+          case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+          default: llvm_unreachable("Unexpected extend load type!");
           }
-          // FIXME: This does not work for vectors on most targets.  Sign- and
-          // zero-extend operations are currently folded into extending loads,
-          // whether they are legal or not, and then we end up here without any
-          // support for legalizing them.
-          assert(ExtType != ISD::EXTLOAD &&
-                 "EXTLOAD should always be supported!");
-          // Turn the unsupported load into an EXTLOAD followed by an explicit
-          // zero/sign extend inreg.
-          Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
-                                  Tmp1, Tmp2, LD->getSrcValue(),
-                                  LD->getSrcValueOffset(), SrcVT,
-                                  LD->isVolatile(), LD->isNonTemporal(),
-                                  LD->getAlignment());
-          SDValue ValRes;
-          if (ExtType == ISD::SEXTLOAD)
-            ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
-                                 Result.getValueType(),
-                                 Result, DAG.getValueType(SrcVT));
-          else
-            ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+        // FIXME: This does not work for vectors on most targets.  Sign- and
+        // zero-extend operations are currently folded into extending loads,
+        // whether they are legal or not, and then we end up here without any
+        // support for legalizing them.
+        assert(ExtType != ISD::EXTLOAD &&
+               "EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                LD->isVolatile(), LD->isNonTemporal(),
+                                LD->getAlignment());
+        SDValue ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                               Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
       }
-
-      // Since loads produce two values, make sure to remember that we legalized
-      // both of them.
-      AddLegalizedOperand(SDValue(Node, 0), Tmp1);
-      AddLegalizedOperand(SDValue(Node, 1), Tmp2);
-      return Op.getResNo() ? Tmp2 : Tmp1;
     }
+
+    // Since loads produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+    return Op.getResNo() ? Tmp2 : Tmp1;
   }
   case ISD::STORE: {
     StoreSDNode *ST = cast<StoreSDNode>(Node);
     Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
     Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
-    int SVOffset = ST->getSrcValueOffset();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
     bool isNonTemporal = ST->isNonTemporal();
@@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           break;
         case TargetLowering::Promote:
           assert(VT.isVector() && "Unknown legal promote case!");
-          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+          Tmp3 = DAG.getNode(ISD::BITCAST, dl,
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getSrcValue(), SVOffset, isVolatile,
+                                ST->getPointerInfo(), isVolatile,
                                 isNonTemporal, Alignment);
           break;
         }
@@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
                                     StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
-        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, NVT, isVolatile, isNonTemporal,
-                                   Alignment);
+        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                   NVT, isVolatile, isNonTemporal, Alignment);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         if (TLI.isLittleEndian()) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
           // Store the bottom RoundWidth bits.
-          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT,
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                 RoundVT,
                                  isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
@@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
-          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
-                                 isNonTemporal,
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         } else {
           // Big endian - avoid unaligned stores.
@@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT, isVolatile, isNonTemporal,
-                                 Alignment);
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+                                 RoundVT, isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                              DAG.getIntPtrConstant(IncrementSize));
-          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                 SVOffset + IncrementSize, ExtraVT, isVolatile,
-                                 isNonTemporal,
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+                              ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         }
 
@@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, isNonTemporal,
-                                Alignment);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
           break;
         }
       }
@@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   DebugLoc dl = Op.getDebugLoc();
   // Store the value to a temporary stack slot, then LOAD the returned part.
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
-                            false, false, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
                        false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                        MachinePointerInfo(),
+                        Vec.getValueType().getVectorElementType(),
+                        false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
   else
-    return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
-                          NULL, 0, Vec.getValueType().getVectorElementType(),
-                          false, false, 0);
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(FI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
 
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
@@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     // element type, only store the bits necessary.
     if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
-                                         Node->getOperand(i), Idx, SV, Offset,
+                                         Node->getOperand(i), Idx,
+                                         PtrInfo.getWithOffset(Offset),
                                          EltVT, false, false, 0));
     } else
       Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
-                                    Node->getOperand(i), Idx, SV, Offset,
+                                    Node->getOperand(i), Idx,
+                                    PtrInfo.getWithOffset(Offset),
                                     false, false, 0));
   }
 
@@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
   if (isTypeLegal(IVT)) {
     // Convert to an integer with the same sign bit.
-    SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
   } else {
     // Store the float to memory, then load the sign part out as an integer.
     MVT LoadTy = TLI.getPointerTy();
@@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
     SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
     // Then store the float to it.
     SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
                    false, false, 0);
     if (TLI.isBigEndian()) {
       assert(FloatVT.isByteSized() && "Unsupported floating point type!");
       // Load out a legal integer with the same sign bit as the float.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+                            false, false, 0);
     } else { // Little endian
       SDValue LoadPtr = StackPtr;
       // The float may be wider than the integer we are going to load.  Advance
@@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
                             LoadPtr, DAG.getIntPtrConstant(ByteOffset));
       // Load a legal integer containing the sign bit.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+                            false, false, 0);
       // Move the sign bit to the top bit of the loaded integer.
       unsigned BitShift = LoadTy.getSizeInBits() -
         (FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
   int SPFI = StackPtrFI->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
 
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   if (SrcSize > SlotSize)
     Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                              SV, 0, SlotVT, false, false, SrcAlign);
+                              PtrInfo, SlotVT, false, false, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                         SV, 0, false, false, SrcAlign);
+                         PtrInfo, false, false, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
-    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
-                       DestAlign);
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+                       false, false, DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
-  return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
-                        false, false, DestAlign);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+                        PtrInfo, SlotVT, false, false, DestAlign);
 }
 
 SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
 
   SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
                                  StackPtr,
-                                 PseudoSourceValue::getFixedStack(SPFI), 0,
+                                 MachinePointerInfo::getFixedStack(SPFI),
                                  Node->getValueType(0).getVectorElementType(),
                                  false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     MachinePointerInfo::getFixedStack(SPFI),
                      false, false, 0);
 }
 
@@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                       PseudoSourceValue::getConstantPool(), 0,
+                       MachinePointerInfo::getConstantPool(),
                        false, false, Alignment);
   }
 
@@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   // The input chain to this libcall is the entry node of the function.
   // Legalizing the call will automatically add the previous call to the
   // dependence.
@@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
 
   // Splice the libcall in wherever FindInputOutputChains tells us to.
   const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), isTailCall,
                     /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
   // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
   const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                    0, TLI.getLibcallCallingConv(LC), false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
                     /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
@@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
     // store the lo of the constructed double - based on integer input
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
-                                  Op0Mapped, Lo, NULL, 0,
+                                  Op0Mapped, Lo, MachinePointerInfo(),
                                   false, false, 0);
     // initial hi portion of constructed double
     SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
     // store the hi of the constructed double - biased exponent
-    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
-                                false, false, 0);
+    SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
     // load the constructed double
-    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
-                               false, false, 0);
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+                               MachinePointerInfo(), false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                              DAG.getConstant(32, MVT::i64));
     SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
     SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
-    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
-    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
     SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
                                 TwoP84PlusTwoP52);
     return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
   }
 
-  // Implementation of unsigned i64 to f32.  This implementation has the
-  // advantage of performing rounding correctly.
+  // Implementation of unsigned i64 to f32.
   // TODO: Generalize this for use with other types.
   if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    // For unsigned conversions, convert them to signed conversions using the
+    // algorithm from the x86_64 __floatundidf in compiler_rt.
+    if (!isSigned) {
+      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+      // TODO: This really should be implemented using a branch rather than a
+      // select.  We happen to get lucky and machinesink does the right
+      // thing most of the time.  This would be a good candidate for a
+      //pseudo-op, or, even better, for whole-function isel.
+      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+    }
+
+    // Otherwise, implement the fully general conversion.
     EVT SHVT = TLI.getShiftAmountTy();
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
     SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
                    Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
-                    ISD::SETUGE);
+                   ISD::SETUGE);
     SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
     return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
                        DAG.getIntPtrConstant(0));
-
   }
 
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   SDValue FudgeInReg;
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
-                             PseudoSourceValue::getConstantPool(), 0,
+                             MachinePointerInfo::getConstantPool(),
                              false, false, Alignment);
   else {
     FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
+                                MachinePointerInfo::getConstantPool(),
                                 MVT::f32, false, false, Alignment));
   }
 
@@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   }
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// ExpandBitCount - Expand the specified bitcount instruction into operations.
 ///
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   switch (Opc) {
   default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
-    static const uint64_t mask[6] = {
-      0x5555555555555555ULL, 0x3333333333333333ULL,
-      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
-      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
-    };
     EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy();
-    unsigned len = VT.getSizeInBits();
-    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
-      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
-      unsigned EltSize = VT.isVector() ?
-        VT.getVectorElementType().getSizeInBits() : len;
-      SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
-      Op = DAG.getNode(ISD::ADD, dl, VT,
-                       DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
-                       DAG.getNode(ISD::AND, dl, VT,
-                                   DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
-                                   Tmp2));
-    }
+    unsigned Len = VT.getSizeInBits();
+
+    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+           "CTPOP not implemented for this type.");
+
+    // This is the "best" algorithm from
+    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+    // v = v - ((v >> 1) & 0x55555555...)
+    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(1, ShVT)),
+                                 Mask55));
+    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+    Op = DAG.getNode(ISD::ADD, dl, VT,
+                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(2, ShVT)),
+                                 Mask33));
+    // v = (v + (v >> 4)) & 0x0F0F0F0F...
+    Op = DAG.getNode(ISD::AND, dl, VT,
+                     DAG.getNode(ISD::ADD, dl, VT, Op,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(4, ShVT))),
+                     Mask0F);
+    // v = (v * 0x01010101...) >> (Len - 8)
+    Op = DAG.getNode(ISD::SRL, dl, VT,
+                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                     DAG.getConstant(Len - 8, ShVT));
+    
     return Op;
   }
   case ISD::CTLZ: {
@@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::PREFETCH:
   case ISD::VAEND:
   case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // If the target didn't expand these, there's nothing to do, so just
+    // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
     break;
   case ISD::EH_SJLJ_SETJMP:
+    // If the target didn't expand this, just return 'zero' and preserve the
+    // chain.
     Results.push_back(DAG.getConstant(0, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
@@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("__sync_synchronize",
                                             TLI.getPointerTy()),
@@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   // By default, atomic intrinsics are marked Legal and lowered. Targets
   // which don't support them directly, however, may want libcalls, in which
   // case they mark them Expand, and we get here.
-  // FIXME: Unimplemented for now. Add libcalls.
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
@@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     TargetLowering::ArgListTy Args;
     std::pair<SDValue, SDValue> CallResult =
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
-                      false, false, false, false, 0, CallingConv::C, false,
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
@@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::FP_ROUND:
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
     Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
                             Node->getValueType(0), dl);
     Results.push_back(Tmp1);
@@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue True, False;
     EVT VT =  Node->getOperand(0).getValueType();
     EVT NVT = Node->getValueType(0);
-    const uint64_t zero[] = {0, 0};
-    APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
     APInt x = APInt::getSignBit(NVT.getSizeInBits());
     (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
     Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Tmp2 = Node->getOperand(1);
     unsigned Align = Node->getConstantOperandVal(3);
 
-    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
-                                     false, false, 0);
+    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+                                     MachinePointerInfo(V), false, false, 0);
     SDValue VAList = VAListLoad;
 
     if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                            TLI.getPointerTy()));
 
       VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
-                           DAG.getConstant(-Align,
+                           DAG.getConstant(-(int64_t)Align,
                                            TLI.getPointerTy()));
     }
 
@@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                           getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
-                        false, false, 0);
+    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+                        MachinePointerInfo(V), false, false, 0);
     // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
                                   false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
@@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), VS, 0, false, false, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
-                        false, false, 0);
+                       Node->getOperand(2), MachinePointerInfo(VS),
+                       false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+                        MachinePointerInfo(VD), false, false, 0);
     Results.push_back(Tmp1);
     break;
   }
   case ISD::EXTRACT_VECTOR_ELT:
     if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
       // This must be an access of the only element.  Return it.
-      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+      Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
                          Node->getOperand(0));
     else
       Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::EXTRACT_SUBVECTOR:
     Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
     break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
   case ISD::CONCAT_VECTORS: {
     Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
@@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
-                                               VT.getSizeInBits() * 2)) &&
-             "Don't know how to expand this operation yet!");
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                                 VT.getSizeInBits() * 2))) {
       EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
+    } else {
+      // We can fall back to a libcall with an illegal type for the MUL if we
+      // have a libcall big enough.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      if (WideVT == MVT::i16)
+        LC = RTLIB::MUL_I16;
+      else if (WideVT == MVT::i32)
+        LC = RTLIB::MUL_I32;
+      else if (WideVT == MVT::i64)
+        LC = RTLIB::MUL_I64;
+      else if (WideVT == MVT::i128)
+        LC = RTLIB::MUL_I128;
+      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      
+      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
     }
     if (isSigned) {
       Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
-                                PseudoSourceValue::getJumpTable(), 0, MemVT,
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+                                MachinePointerInfo::getJumpTable(), MemVT,
                                 false, false, 0);
     Addr = LD;
     if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   case ISD::XOR: {
     unsigned ExtOp, TruncOp;
     if (OVT.isVector()) {
-      ExtOp   = ISD::BIT_CONVERT;
-      TruncOp = ISD::BIT_CONVERT;
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
     } else {
       assert(OVT.isInteger() && "Cannot promote logic operation");
       ExtOp   = ISD::ANY_EXTEND;
@@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector()) {
-      ExtOp   = ISD::BIT_CONVERT;
-      TruncOp = ISD::BIT_CONVERT;
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
     } else if (Node->getValueType(0).isInteger()) {
       ExtOp   = ISD::ANY_EXTEND;
       TruncOp = ISD::TRUNCATE;
@@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
 
     // Cast the two input vectors.
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
-    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
 
     // Convert the shuffle mask to the right # elements.
     Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
-    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
     Results.push_back(Tmp1);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a0721c..27752123aac4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to soften the result of this operator!");
 
-    case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;
     case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
     case ISD::ConstantFP:
       R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     SetSoftenedFloat(SDValue(N, ResNo), R);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
   return BitConvertToInteger(N->getOperand(0));
 }
 
@@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
   unsigned Size = NVT.getSizeInBits();
 
   // Mask = ~(1 << (Size-1))
-  SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
-                                 NVT);
+  APInt API = APInt::getAllOnesValue(Size);
+  API.clearBit(Size-1);
+  SDValue Mask = DAG.getConstant(API, NVT);
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
   return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
 }
@@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
                        NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
-                       L->getSrcValue(), L->getSrcValueOffset(), NVT,
+                       L->getPointerInfo(), NVT,
                        L->isVolatile(), L->isNonTemporal(), L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
@@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   // Do a non-extending load followed by FP_EXTEND.
   NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
                      L->getMemoryVT(), dl, L->getChain(),
-                     L->getBasePtr(), L->getOffset(),
-                     L->getSrcValue(), L->getSrcValueOffset(),
+                     L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
                      L->getMemoryVT(), L->isVolatile(),
                      L->isNonTemporal(), L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
 #endif
     llvm_unreachable("Do not know how to soften this operator's operand!");
 
-  case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
   }
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
                      GetSoftenedFloat(N->getOperand(0)));
 }
 
@@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
     Val = GetSoftenedFloat(Val);
 
   return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
-                      ST->getSrcValue(), ST->getSrcValueOffset(),
+                      ST->getPointerInfo(),
                       ST->isVolatile(), ST->isNonTemporal(),
                       ST->getAlignment());
 }
@@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
   case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
   case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
 
-  Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
-                      LD->getSrcValue(), LD->getSrcValueOffset(),
-                      LD->getMemoryVT(), LD->isVolatile(),
+  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+                      LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
                       LD->isNonTemporal(), LD->getAlignment());
 
   // Remember the chain.
@@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   #endif
       llvm_unreachable("Do not know how to expand this operator's operand!");
 
-    case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break;
+    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
     case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
     case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
 
@@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
   GetExpandedOp(ST->getValue(), Lo, Hi);
 
   return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
-                           ST->getSrcValue(), ST->getSrcValueOffset(),
+                           ST->getPointerInfo(),
                            ST->getMemoryVT(), ST->isVolatile(),
                            ST->isNonTemporal(), ST->getAlignment());
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c589071921..f0752df80f12 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
     llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
   case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
-  case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;
   case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
   case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
@@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
                               N->getMemoryVT(),
                               N->getChain(), N->getBasePtr(),
-                              Op2, N->getSrcValue(), N->getAlignment());
+                              Op2, N->getMemOperand());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
   SDValue Op3 = GetPromotedInteger(N->getOperand(3));
   SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
                               N->getMemoryVT(), N->getChain(), N->getBasePtr(),
-                              Op2, Op3, N->getSrcValue(), N->getAlignment());
+                              Op2, Op3, N->getMemOperand());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
   EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
   case PromoteInteger:
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl,
-                         NOutVT, GetPromotedInteger(InOp));
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
     break;
   case SoftenFloat:
     // Promote the integer operand by hand.
@@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
     return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
                        BitConvertToInteger(GetScalarizedVector(InOp)));
   case SplitVector: {
-    // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split
+    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
     SDValue Lo, Hi;
     GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
                        EVT::getIntegerVT(*DAG.getContext(),
                                          NOutVT.getSizeInBits()),
                        JoinIntegers(Lo, Hi));
-    return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
   case WidenVector:
     if (OutVT.bitsEq(NInVT))
       // The input is widened to the same size.  Convert to the widened value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   // value was zero.  This can be handled by setting the bit just off
   // the top of the original type.
   APInt TopBit(NVT.getSizeInBits(), 0);
-  TopBit.set(OVT.getSizeInBits());
+  TopBit.setBit(OVT.getSizeInBits());
   Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
   return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
 }
@@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   ISD::LoadExtType ExtType =
     ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
   DebugLoc dl = N->getDebugLoc();
-  SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
-                               N->getSrcValue(), N->getSrcValueOffset(),
+  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+                               N->getPointerInfo(),
                                N->getMemoryVT(), N->isVolatile(),
                                N->isNonTemporal(), N->getAlignment());
 
@@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
   return Res;
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+  // Promote the overflow bit trivially.
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  EVT SmallVT = LHS.getValueType();
+
+  // To determine if the result overflowed in a larger type, we extend the input
+  // to the larger type, do the multiply, then check the high bits of the result
+  // to see if the overflow happened.
+  if (N->getOpcode() == ISD::SMULO) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    LHS = ZExtPromotedInteger(LHS);
+    RHS = ZExtPromotedInteger(RHS);
+  }
+  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+  // Overflow occurred iff the high part of the result does not zero/sign-extend
+  // the low part.
+  SDValue Overflow;
+  if (N->getOpcode() == ISD::UMULO) {
+    // Unsigned overflow occurred iff the high part is non-zero.
+    SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+  } else {
+    // Signed overflow occurred iff the high part does not sign extend the low.
+    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+                               Mul, DAG.getValueType(SmallVT));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+  }
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Overflow);
+  return Mul;
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
   // Zero extend the input.
   SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
-  assert(ResNo == 1 && "Only boolean result promotion currently supported!");
-  return PromoteIntRes_Overflow(N);
-}
-
 //===----------------------------------------------------------------------===//
 //  Integer Operand Promotion
 //===----------------------------------------------------------------------===//
@@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
     llvm_unreachable("Do not know how to promote this operator's operand!");
 
   case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
-  case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:      Res = PromoteIntOp_BITCAST(N); break;
   case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;
   case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
   case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
@@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
   return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
   // This should only occur in unusual situations like bitcasting to an
   // x86_fp80, so just turn it into a store+load
   return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
   SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
 
   // Truncate the value and store the result.
-  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
-                           SVOffset, N->getMemoryVT(),
+  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+                           N->getMemoryVT(),
                            isVolatile, isNonTemporal, Alignment);
 }
 
@@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
   case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
   case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;
   case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
 
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+    SplitInteger(Tmp.first, Lo, Hi);
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    break;
+  }
+
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
@@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
 }
 
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
 /// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
 /// and the shift amount is a constant 'Amt'.  Expand the operation.
 void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                                              SDValue &Lo, SDValue &Hi) {
-  DebugLoc dl = N->getDebugLoc();
+  DebugLoc DL = N->getDebugLoc();
   // Expand the incoming operand to be shifted, so that we have its parts
   SDValue InL, InH;
   GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Lo = Hi = DAG.getConstant(0, NVT);
     } else if (Amt > NVTBits) {
       Lo = DAG.getConstant(0, NVT);
-      Hi = DAG.getNode(ISD::SHL, dl,
-                       NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+      Hi = DAG.getNode(ISD::SHL, DL,
+                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
     } else if (Amt == NVTBits) {
       Lo = DAG.getConstant(0, NVT);
       Hi = InL;
@@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                TLI.isOperationLegalOrCustom(ISD::ADDC,
                               TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
       // Emit this X << 1 as X+X.
-      SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+      SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
       SDValue LoOps[2] = { InL, InL };
-      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
       SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
-      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+      Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
     } else {
-      Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
-      Hi = DAG.getNode(ISD::OR, dl, NVT,
-                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
                                    DAG.getConstant(Amt, ShTy)),
-                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
                                    DAG.getConstant(NVTBits-Amt, ShTy)));
     }
     return;
@@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
       Lo = DAG.getConstant(0, NVT);
       Hi = DAG.getConstant(0, NVT);
     } else if (Amt > NVTBits) {
-      Lo = DAG.getNode(ISD::SRL, dl,
+      Lo = DAG.getNode(ISD::SRL, DL,
                        NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
       Hi = DAG.getConstant(0, NVT);
     } else if (Amt == NVTBits) {
       Lo = InH;
       Hi = DAG.getConstant(0, NVT);
     } else {
-      Lo = DAG.getNode(ISD::OR, dl, NVT,
-                       DAG.getNode(ISD::SRL, dl, NVT, InL,
+      Lo = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
                                    DAG.getConstant(Amt, ShTy)),
-                       DAG.getNode(ISD::SHL, dl, NVT, InH,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
                                    DAG.getConstant(NVTBits-Amt, ShTy)));
-      Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
     }
     return;
   }
 
   assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
   if (Amt > VTBits) {
-    Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
                           DAG.getConstant(NVTBits-1, ShTy));
   } else if (Amt > NVTBits) {
-    Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(Amt-NVTBits, ShTy));
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(NVTBits-1, ShTy));
   } else if (Amt == NVTBits) {
     Lo = InH;
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(NVTBits-1, ShTy));
   } else {
-    Lo = DAG.getNode(ISD::OR, dl, NVT,
-                     DAG.getNode(ISD::SRL, dl, NVT, InL,
+    Lo = DAG.getNode(ISD::OR, DL, NVT,
+                     DAG.getNode(ISD::SRL, DL, NVT, InL,
                                  DAG.getConstant(Amt, ShTy)),
-                     DAG.getNode(ISD::SHL, dl, NVT, InH,
+                     DAG.getNode(ISD::SHL, DL, NVT, InH,
                                  DAG.getConstant(NVTBits-Amt, ShTy)));
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
   }
 }
 
@@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
   // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
   // them.  TODO: Teach operation legalization how to expand unsupported
   // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
-  // a carry of type MVT::Flag, but there doesn't seem to be any way to
+  // a carry of type MVT::Glue, but there doesn't seem to be any way to
   // generate a value of this type in the expanded code sequence.
   bool hasCarry =
     TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
                                  TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
 
   if (hasCarry) {
-    SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+    SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
     if (N->getOpcode() == ISD::ADD) {
       Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
       HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
       HiOps[2] = Lo.getValue(1);
       Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
     }
+    return;    
+  }
+  
+  if (N->getOpcode() == ISD::ADD) {
+    Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+    SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+                                ISD::SETULT);
+    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+                                ISD::SETULT);
+    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+                                 DAG.getConstant(1, NVT), Carry1);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
   } else {
-    if (N->getOpcode() == ISD::ADD) {
-      Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
-      Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
-      SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
-                                  ISD::SETULT);
-      SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
-      SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
-                                  ISD::SETULT);
-      SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
-                                   DAG.getConstant(1, NVT), Carry1);
-      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
-    } else {
-      Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
-      Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
-      SDValue Cmp =
-        DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
-                     LoOps[0], LoOps[1], ISD::SETULT);
-      SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
-                                   DAG.getConstant(1, NVT),
-                                   DAG.getConstant(0, NVT));
-      Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
-    }
+    Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+    SDValue Cmp =
+      DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+                   LoOps[0], LoOps[1], ISD::SETULT);
+    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
   }
 }
 
@@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
-  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
   SDValue LoOps[2] = { LHSL, RHSL };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
   GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
-  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
   SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
   SDValue HiOps[3] = { LHSH, RHSH };
 
@@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned NBitWidth = NVT.getSizeInBits();
   const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
-  Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
   Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
 }
 
@@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
   ISD::LoadExtType ExtType = N->getExtensionType();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   if (N->getMemoryVT().bitsLE(NVT)) {
     EVT MemVT = N->getMemoryVT();
 
-    Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
                         MemVT, isVolatile, isNonTemporal, Alignment);
 
     // Remember the chain.
@@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     }
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
-    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
                      isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
@@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
-    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
-                        SVOffset+IncrementSize, NEVT,
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
                         isVolatile, isNonTemporal,
                         MinAlign(Alignment, IncrementSize));
 
@@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     unsigned ExcessBits = (EBytes - IncrementSize)*8;
 
     // Load both the high bits and maybe some of the low bits.
-    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
                         EVT::getIntegerVT(*DAG.getContext(),
                                           MemVT.getSizeInBits() - ExcessBits),
                         isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
     // Load the rest of the low bits.
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
-                        SVOffset+IncrementSize,
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize),
                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
                         isVolatile, isNonTemporal,
                         MinAlign(Alignment, IncrementSize));
@@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
+                                              SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+  SplitInteger(Ret, Lo, Hi);
+  
+  // Now calculate overflow.
+  SDValue Ofl;
+  if (N->getOpcode() == ISD::UMULO)
+    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
+                       DAG.getConstant(0, VT), ISD::SETNE);
+  else {
+    SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
+    Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
+    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
+  }
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   #endif
     llvm_unreachable("Do not know how to expand this operator's operand!");
 
-  case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;
   case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
   case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (N->getMemoryVT().bitsLE(NVT)) {
     GetExpandedInteger(N->getValue(), Lo, Hi);
-    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
                              N->getMemoryVT(), isVolatile, isNonTemporal,
                              Alignment);
-  } else if (TLI.isLittleEndian()) {
+  }
+
+  if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
-    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
                       isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
@@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     unsigned IncrementSize = NVT.getSizeInBits()/8;
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getIntPtrConstant(IncrementSize));
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset+IncrementSize, NEVT,
-                           isVolatile, isNonTemporal,
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           NEVT, isVolatile, isNonTemporal,
                            MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-  } else {
-    // Big-endian - high bits are at low addresses.  Favor aligned stores at
-    // the cost of some bit-fiddling.
-    GetExpandedInteger(N->getValue(), Lo, Hi);
-
-    EVT ExtVT = N->getMemoryVT();
-    unsigned EBytes = ExtVT.getStoreSize();
-    unsigned IncrementSize = NVT.getSizeInBits()/8;
-    unsigned ExcessBits = (EBytes - IncrementSize)*8;
-    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
-                                 ExtVT.getSizeInBits() - ExcessBits);
+  }
 
-    if (ExcessBits < NVT.getSizeInBits()) {
-      // Transfer high bits from the top of Lo to the bottom of Hi.
-      Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
-                                       TLI.getPointerTy()));
-      Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
-                       DAG.getNode(ISD::SRL, dl, NVT, Lo,
-                                   DAG.getConstant(ExcessBits,
-                                                   TLI.getPointerTy())));
-    }
+  // Big-endian - high bits are at low addresses.  Favor aligned stores at
+  // the cost of some bit-fiddling.
+  GetExpandedInteger(N->getValue(), Lo, Hi);
+
+  EVT ExtVT = N->getMemoryVT();
+  unsigned EBytes = ExtVT.getStoreSize();
+  unsigned IncrementSize = NVT.getSizeInBits()/8;
+  unsigned ExcessBits = (EBytes - IncrementSize)*8;
+  EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                               ExtVT.getSizeInBits() - ExcessBits);
+
+  if (ExcessBits < NVT.getSizeInBits()) {
+    // Transfer high bits from the top of Lo to the bottom of Hi.
+    Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                     TLI.getPointerTy()));
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                                 DAG.getConstant(ExcessBits,
+                                                 TLI.getPointerTy())));
+  }
 
-    // Store both the high bits and maybe some of the low bits.
-    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset, HiVT, isVolatile, isNonTemporal,
-                           Alignment);
+  // Store both the high bits and maybe some of the low bits.
+  Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+                         HiVT, isVolatile, isNonTemporal, Alignment);
 
-    // Increment the pointer to the other half.
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getIntPtrConstant(IncrementSize));
-    // Store the lowest ExcessBits bits in the second half.
-    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
-                           SVOffset+IncrementSize,
-                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                           isVolatile, isNonTemporal,
-                           MinAlign(Alignment, IncrementSize));
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
-  }
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  // Store the lowest ExcessBits bits in the second half.
+  Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+                         N->getPointerInfo().getWithOffset(IncrementSize),
+                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                         isVolatile, isNonTemporal,
+                         MinAlign(Alignment, IncrementSize));
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 
     // Load the value out, extending it from f32 to the destination float type.
     // FIXME: Avoid the extend by constructing the right constant pool?
-    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
-                                   FudgePtr, NULL, 0, MVT::f32,
+    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+                                   FudgePtr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   MVT::f32,
                                    false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98e9b56..cedda7e7075a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
           if (M->getNodeId() == Processed)
             RemapValue(NewVal);
           DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+          // OldVal may be a target of the ReplacedValues map which was marked
+          // NewNode to force reanalysis because it was updated.  Ensure that
+          // anything that ReplacedValues mapped to OldVal will now be mapped
+          // all the way to NewVal.
+          ReplacedValues[OldVal] = NewVal;
         }
         // The original node continues to exist in the DAG, marked NewNode.
       }
@@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 /// BitConvertToInteger - Convert to an integer of the same size.
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
   unsigned BitWidth = Op.getValueType().getSizeInBits();
-  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
                      EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
 
@@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
   EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
-  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
                      EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
 }
 
@@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
   // the source and destination types.
   SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
   // Result is a load from the stack slot.
-  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+                     false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
   return CallInfo.first;
 }
 
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  return CallInfo;
+}
+
 /// PromoteTargetBoolean - Promote the given target boolean to a target boolean
 /// of the given type.  A target boolean is an integer value, not necessarily of
 /// type i1, the bits of which conform to getBooleanContents.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d56029208e61..3f81bbbe4061 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -99,7 +99,7 @@ private:
           return SoftenFloat;
         return ExpandFloat;
       }
-        
+
       if (VT.getVectorNumElements() == 1)
         return ScalarizeVector;
       return SplitVector;
@@ -192,6 +192,10 @@ private:
   SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
                       const SDValue *Ops, unsigned NumOps, bool isSigned,
                       DebugLoc dl);
+	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+									                               SDNode *Node, bool isSigned);
+	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
   SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -244,7 +248,7 @@ private:
   SDValue PromoteIntRes_AssertZext(SDNode *N);
   SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
   SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
-  SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntRes_BITCAST(SDNode *N);
   SDValue PromoteIntRes_BSWAP(SDNode *N);
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntRes_Constant(SDNode *N);
@@ -278,7 +282,7 @@ private:
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
   SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
-  SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+  SDValue PromoteIntOp_BITCAST(SDNode *N);
   SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
@@ -344,6 +348,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UMULSMULO	      (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -352,7 +357,7 @@ private:
 
   // Integer Operand Expansion.
   bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
-  SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+  SDValue ExpandIntOp_BITCAST(SDNode *N);
   SDValue ExpandIntOp_BR_CC(SDNode *N);
   SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
   SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -387,7 +392,7 @@ private:
 
   // Result Float to Integer Conversion.
   void SoftenFloatResult(SDNode *N, unsigned OpNo);
-  SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatRes_BITCAST(SDNode *N);
   SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
   SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
   SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -426,7 +431,7 @@ private:
 
   // Operand Float to Integer Conversion.
   bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
-  SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+  SDValue SoftenFloatOp_BITCAST(SDNode *N);
   SDValue SoftenFloatOp_BR_CC(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -515,7 +520,7 @@ private:
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
   SDValue ScalarizeVecRes_InregOp(SDNode *N);
 
-  SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecRes_BITCAST(SDNode *N);
   SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -532,7 +537,7 @@ private:
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
-  SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+  SDValue ScalarizeVecOp_BITCAST(SDNode *N);
   SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -557,7 +562,7 @@ private:
   void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
-  void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -577,11 +582,12 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
 
-  SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+  SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Support: LegalizeVectorTypes.cpp
@@ -603,7 +609,7 @@ private:
 
   // Widen Vector Result Promotion.
   void WidenVectorResult(SDNode *N, unsigned ResNo);
-  SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+  SDValue WidenVecRes_BITCAST(SDNode* N);
   SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
   SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
   SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -628,7 +634,7 @@ private:
 
   // Widen Vector Operand.
   bool WidenVectorOperand(SDNode *N, unsigned ResNo);
-  SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+  SDValue WidenVecOp_BITCAST(SDNode *N);
   SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -721,7 +727,7 @@ private:
   }
 
   // Generic Result Expansion.
-  void ExpandRes_BIT_CONVERT       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_BITCAST           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -729,7 +735,7 @@ private:
   void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   // Generic Operand Expansion.
-  SDValue ExpandOp_BIT_CONVERT      (SDNode *N);
+  SDValue ExpandOp_BITCAST          (SDNode *N);
   SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
   SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
   SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9ed73d..a75ae87f3cbe 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -32,8 +32,7 @@ using namespace llvm;
 // little/big-endian machines, followed by the Hi/Lo part.  This means that
 // they cannot be used as is on vectors, for which Lo is always stored first.
 
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
-                                             SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   SDValue InOp = N->getOperand(0);
@@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     case SoftenFloat:
       // Convert the integer operand instead.
       SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case ExpandInteger:
     case ExpandFloat:
       // Convert the expanded pieces of the input.
       GetExpandedOp(InOp, Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case SplitVector:
       GetSplitVector(InOp, Lo, Hi);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case ScalarizeVector:
       // Convert the element instead.
       SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case WidenVector: {
-      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
       InOp = GetWidenedVector(InOp);
       EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                    InVT.getVectorNumElements()/2);
@@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                        DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     }
   }
 
   if (InVT.isVector() && OutVT.isInteger()) {
-    // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+    // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
     // is legal but the result is not.
     EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
 
     if (isTypeLegal(NVT)) {
-      SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+      SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
       Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
                        DAG.getIntPtrConstant(0));
       Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                                               getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
 
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
                                false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
                          DAG.getIntPtrConstant(IncrementSize));
 
   // Load the second half from the stack slot.
-  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+                   PtrInfo.getWithOffset(IncrementSize), false,
                    false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
@@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT OldVT = N->getValueType(0);
   EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
 
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
                                EVT::getVectorVT(*DAG.getContext(),
                                                 NewVT, 2*OldElts),
                                OldVec);
@@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
-  int SVOffset = LD->getSrcValueOffset();
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
-  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
                    isVolatile, isNonTemporal, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
-                   SVOffset+IncrementSize,
+  Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
                    isVolatile, isNonTemporal,
                    MinAlign(Alignment, IncrementSize));
 
@@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
 // Generic Operand Expansion.
 //===--------------------------------------------------------------------===//
 
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   if (N->getValueType(0).isVector()) {
     // An illegal expanding type is being converted to a legal vector type.
     // Make a two element vector out of the expanded parts and convert that
     // instead, but only if the new vector type is legal (otherwise there
     // is no point, and it might create expansion loops).  For example, on
-    // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+    // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
     EVT OVT = N->getOperand(0).getValueType();
     EVT NVT = EVT::getVectorVT(*DAG.getContext(),
                                TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
         std::swap(Parts[0], Parts[1]);
 
       SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+      return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
     }
   }
 
@@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
                                &NewElts[0], NewElts.size());
 
   // Convert the new vector to the old vector type.
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
 }
 
 SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   // Bitconvert to a vector of twice the length with elements of the expanded
   // type, insert the expanded vector elements, and then convert back.
   EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
-  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
                                NewVecVT, N->getOperand(0));
 
   SDValue Lo, Hi;
@@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
   NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
 
   // Convert the new vector to the old vector type.
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
 }
 
 SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
                                      St->getValue().getValueType());
   SDValue Chain = St->getChain();
   SDValue Ptr = St->getBasePtr();
-  int SVOffset = St->getSrcValueOffset();
   unsigned Alignment = St->getAlignment();
   bool isVolatile = St->isVolatile();
   bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
 
-  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
                     isVolatile, isNonTemporal, Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
-  Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
-                    SVOffset + IncrementSize,
+  Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+                    St->getPointerInfo().getWithOffset(IncrementSize),
                     isVolatile, isNonTemporal,
                     MinAlign(Alignment, IncrementSize));
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c08724210..167dbe0377b3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
 
   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     if (Op.getOperand(j).getValueType().isVector())
-      Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
     else
       Operands[j] = Op.getOperand(j);
   }
 
   Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
 
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93bc2d04928e..182f8fcbfbf3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
-  case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
   case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
@@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
   EVT NewVT = N->getValueType(0).getVectorElementType();
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                      NewVT, N->getOperand(0));
 }
 
@@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                N->getDebugLoc(),
                                N->getChain(), N->getBasePtr(),
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
-                               N->getSrcValue(), N->getSrcValueOffset(),
+                               N->getPointerInfo(),
                                N->getMemoryVT().getVectorElementType(),
                                N->isVolatile(), N->isNonTemporal(),
                                N->getOriginalAlignment());
@@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
       dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to scalarize this operator's operand!");
-    case ISD::BIT_CONVERT:
-      Res = ScalarizeVecOp_BIT_CONVERT(N);
+    case ISD::BITCAST:
+      Res = ScalarizeVecOp_BITCAST(N);
       break;
     case ISD::CONCAT_VECTORS:
       Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
 /// to be scalarized, it must be <1 x ty>.  Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
   SDValue Elt = GetScalarizedVector(N->getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
                      N->getValueType(0), Elt);
 }
 
@@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
   if (N->isTruncatingStore())
     return DAG.getTruncStore(N->getChain(), dl,
                              GetScalarizedVector(N->getOperand(1)),
-                             N->getBasePtr(),
-                             N->getSrcValue(), N->getSrcValueOffset(),
+                             N->getBasePtr(), N->getPointerInfo(),
                              N->getMemoryVT().getVectorElementType(),
                              N->isVolatile(), N->isNonTemporal(),
                              N->getAlignment());
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
-                      N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+                      N->getBasePtr(), N->getPointerInfo(),
                       N->isVolatile(), N->isNonTemporal(),
                       N->getOriginalAlignment());
 }
@@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
 
-  case ISD::BIT_CONVERT:       SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;
   case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
   case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
@@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
 }
 
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
-                                               SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
   // We know the result is a vector.  The input may be either a vector or a
   // scalar value.
   EVT LoVT, HiVT;
@@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
       GetExpandedOp(InOp, Lo, Hi);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
       return;
     }
     break;
@@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
     // If the input is a vector that needs to be split, convert each split
     // piece of the input now.
     GetSplitVector(InOp, Lo, Hi);
-    Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-    Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+    Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+    Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
     return;
   }
 
@@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
-  Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
-  Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+  Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+  Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
 }
 
 void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
+                      DAG.getIntPtrConstant(0));
     VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
     break;
   }
   }
@@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
   SDValue Idx = N->getOperand(1);
-  EVT IdxVT = Idx.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
   EVT LoVT, HiVT;
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
-  Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
-                    DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
-  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
 }
 
 void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
@@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
   unsigned Alignment =
     TLI.getTargetData()->getPrefTypeAlignment(VecType);
-  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
                             false, false, 0);
 
   // Load the Lo part from the stack slot.
-  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
                    false, false, 0);
 
   // Increment the pointer to the other part.
@@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                          DAG.getIntPtrConstant(IncrementSize));
 
   // Load the Hi part from the stack slot.
-  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
-                   false, MinAlign(Alignment, IncrementSize));
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   SDValue Ch = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
-  const Value *SV = LD->getSrcValue();
-  int SVOffset = LD->getSrcValueOffset();
   EVT MemoryVT = LD->getMemoryVT();
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
@@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
-                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+                   LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+                   Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  SVOffset += IncrementSize;
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
-                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   HiMemVT, isVolatile, isNonTemporal, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
 #endif
       llvm_unreachable("Do not know how to split this operator's operand!");
 
-    case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break;
+    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
@@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::FP_TO_UINT:
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
+    case ISD::FP_EXTEND:
+    case ISD::FTRUNC:
     case ISD::TRUNCATE:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
@@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
-  // For example, i64 = BIT_CONVERT v4i16 on alpha.  Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+  // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
   // end up being split all the way down to individual components.  Convert the
   // split pieces into integers and reassemble.
   SDValue Lo, Hi;
@@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
   if (TLI.isBigEndian())
     std::swap(Lo, Hi);
 
-  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
                      JoinIntegers(Lo, Hi));
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
-  // We know that the extracted result type is legal.  For now, assume the index
-  // is a constant.
+  // We know that the extracted result type is legal.
   EVT SubVT = N->getValueType(0);
   SDValue Idx = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
@@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   EVT EltVT = VecVT.getVectorElementType();
   DebugLoc dl = N->getDebugLoc();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
-                               false, false, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
-  return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
-                        SV, 0, EltVT, false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        MachinePointerInfo(), EltVT, false, false, 0);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   bool isTruncating = N->isTruncatingStore();
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  int SVOffset = N->getSrcValueOffset();
   EVT MemoryVT = N->getMemoryVT();
   unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
@@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
 
   if (isTruncating)
-    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
                            LoMemVT, isVol, isNT, Alignment);
   else
-    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
                       isVol, isNT, Alignment);
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
-  SVOffset += IncrementSize;
 
   if (isTruncating)
-    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
                            HiMemVT, isVol, isNT, Alignment);
   else
-    Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+    Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+                      N->getPointerInfo().getWithOffset(IncrementSize),
                       isVol, isNT, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
@@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   DebugLoc DL = N->getDebugLoc();
-  
+
   // The input operands all must have the same type, and we know the result the
   // result type is valid.  Convert this to a buildvector which extracts all the
   // input elements.
@@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
 
     }
   }
-  
+
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
                      &Elts[0], Elts.size());
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+  
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+  
+  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}  
+
+
 
 //===----------------------------------------------------------------------===//
 //  Result Vector Widening
@@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to widen the result of this operator!");
 
-  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break;
+  case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
   case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
   case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   EVT WidenEltVT = WidenVT.getVectorElementType();
   EVT VT = WidenVT;
   unsigned NumElts =  VT.getVectorNumElements();
-  while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
     NumElts = NumElts / 2;
     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
   }
@@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     SDValue InOp2 = GetWidenedVector(N->getOperand(1));
     return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
   }
-  
+
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
     return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-  
+
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
   SDValue InOp1 = GetWidenedVector(N->getOperand(0));
@@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   unsigned ConcatEnd = 0;  // Current ConcatOps index.
   int Idx = 0;        // Current Idx into input vectors.
 
-  // NumElts := greatest synthesizable vector size (at most WidenVT)
+  // NumElts := greatest legal vector size (at most WidenVT)
   // while (orig. vector has unhandled elements) {
   //   take munches of size NumElts from the beginning and add to ConcatOps
   //   NumElts := next smaller supported vector size or 1
@@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     do {
       NumElts = NumElts / 2;
       VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
-    } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+    } while (!TLI.isTypeLegal(VT) && NumElts != 1);
 
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
-        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
                                    InOp1, DAG.getIntPtrConstant(Idx));
-        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
                                    InOp2, DAG.getIntPtrConstant(Idx));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
@@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     do {
       NextSize *= 2;
       NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
-    } while (!TLI.isTypeSynthesizable(NextVT));
+    } while (!TLI.isTypeLegal(NextVT));
 
     if (!VT.isVector()) {
       // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
@@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
     if (VT == WidenVT)
       return ConcatOps[0];
   }
-  
+
   // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
   unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
   if (NumOps != ConcatEnd ) {
@@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   SDValue InOp = N->getOperand(0);
-  DebugLoc dl = N->getDebugLoc();
+  DebugLoc DL = N->getDebugLoc();
 
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     InOp = GetWidenedVector(N->getOperand(0));
     InVT = InOp.getValueType();
     InVTNumElts = InVT.getVectorNumElements();
-    if (InVTNumElts == WidenNumElts)
-      return DAG.getNode(Opcode, dl, WidenVT, InOp);
+    if (InVTNumElts == WidenNumElts) {
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InOp);
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+    }
   }
 
-  if (TLI.isTypeSynthesizable(InWidenVT)) {
+  if (TLI.isTypeLegal(InWidenVT)) {
     // Because the result and the input are different vector types, widening
     // the result could create a legal type but widening the input might make
     // it an illegal type that might lead to repeatedly splitting the input
@@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       SDValue UndefVal = DAG.getUNDEF(InVT);
       for (unsigned i = 1; i != NumConcat; ++i)
         Ops[i] = UndefVal;
-      return DAG.getNode(Opcode, dl, WidenVT,
-                         DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
-                         &Ops[0], NumConcat));
+      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+                                  &Ops[0], NumConcat);
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVec);
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+                                  InOp, DAG.getIntPtrConstant(0));
       // Extract the input and convert the shorten input vector.
-      return DAG.getNode(Opcode, dl, WidenVT,
-                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
-                                     InOp, DAG.getIntPtrConstant(0)));
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVal);
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
     }
   }
 
@@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   EVT EltVT = WidenVT.getVectorElementType();
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
-  for (i=0; i < MinElts; ++i)
-    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getIntPtrConstant(i)));
+  for (i=0; i < MinElts; ++i) {
+    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+                              DAG.getIntPtrConstant(i));
+    if (N->getNumOperands() == 1)
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+    else
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+  }
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
     Ops[i] = UndefVal;
 
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
                      WidenVT, WidenLHS, DAG.getValueType(ExtVT));
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
   EVT VT = N->getValueType(0);
@@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     InOp = GetPromotedInteger(InOp);
     InVT = InOp.getValueType();
     if (WidenVT.bitsEq(InVT))
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   case SoftenFloat:
   case ExpandInteger:
@@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
     InVT = InOp.getValueType();
     if (WidenVT.bitsEq(InVT))
       // The input widens to the same size. Convert to the widen value.
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
     break;
   }
 
   unsigned WidenSize = WidenVT.getSizeInBits();
   unsigned InSize = InVT.getSizeInBits();
-  if (WidenSize % InSize == 0) {
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
     // Determine new input vector type.  The new input vector type will use
     // the same element type (if its a vector) or use the input type as a
     // vector.  It is the same size as the type to widen to.
@@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
       NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
     }
 
-    if (TLI.isTypeSynthesizable(NewInVT)) {
+    if (TLI.isTypeLegal(NewInVT)) {
       // Because the result and the input are different vector types, widening
       // the result could create a legal type but widening the input might make
       // it an illegal type that might lead to repeatedly splitting the input
@@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
       else
         NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
                              NewInVT, &Ops[0], NewNumElts);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
     }
   }
 
@@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
                                   SatOp, CvtCode);
   }
 
-  if (TLI.isTypeSynthesizable(InWidenVT)) {
+  if (TLI.isTypeLegal(InWidenVT)) {
     // Because the result and the input are different vector types, widening
     // the result could create a legal type but widening the input might make
     // it an illegal type that might lead to repeatedly splitting the input
@@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
 
   EVT InVT = InOp.getValueType();
 
-  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
-  if (CIdx) {
-    unsigned IdxVal = CIdx->getZExtValue();
-    // Check if we can just return the input vector after widening.
-    if (IdxVal == 0 && InVT == WidenVT)
-      return InOp;
-
-    // Check if we can extract from the vector.
-    unsigned InNumElts = InVT.getVectorNumElements();
-    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
-        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
-  }
+  // Check if we can just return the input vector after widening.
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0 && InVT == WidenVT)
+    return InOp;
+
+  // Check if we can extract from the vector.
+  unsigned InNumElts = InVT.getVectorNumElements();
+  if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
 
   // We could try widening the input to the right length but for now, extract
   // the original elements, fill the rest with undefs and build a vector.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   EVT EltVT = VT.getVectorElementType();
-  EVT IdxVT = Idx.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
   unsigned i;
-  if (CIdx) {
-    unsigned IdxVal = CIdx->getZExtValue();
-    for (i=0; i < NumElts; ++i)
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getConstant(IdxVal+i, IdxVT));
-  } else {
-    Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
-    for (i=1; i < NumElts; ++i) {
-      SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
-                                   DAG.getConstant(i, IdxVT));
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
-    }
-  }
+  for (i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                         DAG.getIntPtrConstant(IdxVal+i));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to widen this operator's operand!");
 
-  case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break;
+  case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
   case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
 }
 
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue InOp = GetWidenedVector(N->getOperand(0));
   EVT InWidenVT = InOp.getValueType();
@@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
   // Check if we can convert between two legal vector types and extract.
   unsigned InWidenSize = InWidenVT.getSizeInBits();
   unsigned Size = VT.getSizeInBits();
-  if (InWidenSize % Size == 0 && !VT.isVector()) {
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
     unsigned NewNumElts = InWidenSize / Size;
     EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
-    if (TLI.isTypeSynthesizable(NewVT)) {
-      SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+    if (TLI.isTypeLegal(NewVT)) {
+      SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
                          DAG.getIntPtrConstant(0));
     }
@@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
   if (Width == WidenEltWidth)
     return RetVT;
 
-  // See if there is larger legal integer than the element type to load/store 
+  // See if there is larger legal integer than the element type to load/store
   unsigned VT;
   for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
        VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
-    if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       RetVT = MemVT;
@@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
        VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
     EVT MemVT = (MVT::SimpleValueType) VT;
     unsigned MemVTWidth = MemVT.getSizeInBits();
-    if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
     if (NewLdTy != LdTy) {
       NumElts = Width / NewLdTy.getSizeInBits();
       NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
-      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+      VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
       // Readjust position and vector position based on new load type
       Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
       LdTy = NewLdTy;
@@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
     VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
                         DAG.getIntPtrConstant(Idx++));
   }
-  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+  return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
 
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
-                                              LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+                                              LoadSDNode *LD) {
   // The strategy assumes that we can efficiently load powers of two widths.
   // The routines chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
@@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // Load information
   SDValue   Chain = LD->getChain();
   SDValue   BasePtr = LD->getBasePtr();
-  int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
-  const Value *SV = LD->getSrcValue();
 
   int LdWidth = LdVT.getSizeInBits();
   int WidthDiff = WidenWidth - LdWidth;          // Difference
@@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   // Find the vector type that can load from.
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
-  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
                              isVolatile, isNonTemporal, Align);
   LdChain.push_back(LdOp.getValue(1));
 
@@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       unsigned NumElts = WidenWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
       SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
-      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
     }
     if (NewVT == WidenVT)
       return LdOp;
@@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
       NewVTWidth = NewVT.getSizeInBits();
     }
 
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
-                               SVOffset+Offset, isVolatile,
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                               LD->getPointerInfo().getWithOffset(Offset),
+                               isVolatile,
                                isNonTemporal, MinAlign(Align, Increment));
     LdChain.push_back(LdOp.getValue(1));
     LdOps.push_back(LdOp);
@@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   if (!LdOps[0].getValueType().isVector())
     // All the loads are scalar loads.
     return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-  
+
   // If the load contains vectors, build the vector using concat vector.
   // All of the vectors used to loads are power of 2 and the scalars load
   // can be combined to make a power of 2 vector.
@@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   // Load information
   SDValue   Chain = LD->getChain();
   SDValue   BasePtr = LD->getBasePtr();
-  int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
-  const Value *SV = LD->getSrcValue();
 
   EVT EltVT = WidenVT.getVectorElementType();
   EVT LdEltVT = LdVT.getVectorElementType();
@@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Increment = LdEltVT.getSizeInBits() / 8;
-  Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+                          LD->getPointerInfo(),
                           LdEltVT, isVolatile, isNonTemporal, Align);
   LdChain.push_back(Ops[0].getValue(1));
   unsigned i = 0, Offset = Increment;
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr, DAG.getIntPtrConstant(Offset));
-    Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
-                            SVOffset + Offset, LdEltVT, isVolatile,
-                            isNonTemporal, Align);
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+                            LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+                            isVolatile, isNonTemporal, Align);
     LdChain.push_back(Ops[i].getValue(1));
   }
 
@@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
   // element type or scalar stores.
   SDValue  Chain = ST->getChain();
   SDValue  BasePtr = ST->getBasePtr();
-  const    Value *SV = ST->getSrcValue();
-  int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
   bool     isNonTemporal = ST->isNonTemporal();
@@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
                                    DAG.getIntPtrConstant(Idx));
-        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                       SVOffset + Offset, isVolatile,
-                                       isNonTemporal,
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
                                        MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
@@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
       // Cast the vector to the scalar type we can store
       unsigned NumElts = ValWidth / NewVTWidth;
       EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
-      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+      SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
       // Readjust index position based on new vector type
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
                       DAG.getIntPtrConstant(Idx++));
-        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                       SVOffset + Offset, isVolatile,
-                                       isNonTemporal, MinAlign(Align, Offset)));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   // and then store it.  Instead, we extract each element and then store it.
   SDValue  Chain = ST->getChain();
   SDValue  BasePtr = ST->getBasePtr();
-  const    Value *SV = ST->getSrcValue();
-  int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
   bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
-  
+
   EVT StVT = ST->getMemoryVT();
   EVT ValVT = ValOp.getValueType();
 
@@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   unsigned NumElts = StVT.getVectorNumElements();
   SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
                             DAG.getIntPtrConstant(0));
-  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
-                                      SVOffset, StEltVT,
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+                                      ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align));
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
                                      BasePtr, DAG.getIntPtrConstant(Offset));
     SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
                             DAG.getIntPtrConstant(0));
-    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
-                                        SVOffset + Offset, StEltVT,
-                                        isVolatile, isNonTemporal,
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+                                      ST->getPointerInfo().getWithOffset(Offset),
+                                        StEltVT, isVolatile, isNonTemporal,
                                         MinAlign(Align, Offset)));
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d33884b26..2dcb22957325 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fae27294e364..e3da2084529a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
 /// successors to the newly created node.
 SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
-  if (SU->getNode()->getFlaggedNode())
+  if (SU->getNode()->getGluedNode())
     return NULL;
 
   SDNode *N = SU->getNode();
@@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
     else if (VT == MVT::Other)
       TryUnfold = true;
@@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
     EVT VT = Op.getNode()->getValueType(Op.getResNo());
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
   }
 
@@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     }
   }
 
-  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
     if (Node->getOpcode() == ISD::INLINEASM) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
-      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-        --NumOps;  // Ignore the flag operand.
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
 
       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded50083..430283d5eff9 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
 static RegisterScheduler
   tdListDAGScheduler("list-td", "Top-down list scheduler",
                      createTDListDAGScheduler);
-   
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGList - The actual list scheduler implementation.  This supports
@@ -51,7 +51,7 @@ private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   ///
   SchedulingPriorityQueue *AvailableQueue;
-  
+
   /// PendingQueue - This contains all of the instructions whose operands have
   /// been issued, but their results are not ready yet (due to the latency of
   /// the operation).  Once the operands become available, the instruction is
@@ -63,11 +63,12 @@ private:
 
 public:
   ScheduleDAGList(MachineFunction &mf,
-                  SchedulingPriorityQueue *availqueue,
-                  ScheduleHazardRecognizer *HR)
-    : ScheduleDAGSDNodes(mf),
-      AvailableQueue(availqueue), HazardRec(HR) {
-    }
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
 
   ~ScheduleDAGList() {
     delete HazardRec;
@@ -87,14 +88,14 @@ private:
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
   DEBUG(dbgs() << "********** List Scheduling **********\n");
-  
+
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
 
   AvailableQueue->initNodes(SUnits);
-  
+
   ListScheduleTopDown();
-  
+
   AvailableQueue->releaseState();
 }
 
@@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
   --SuccSU->NumPredsLeft;
 
   SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-  
+
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
-  
+
   Sequence.push_back(SU);
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
   SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       SUnits[i].isAvailable = true;
     }
   }
-  
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
         assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
       }
     }
-    
+
     // If there are no instructions available, don't try to issue anything, and
     // don't advance the hazard recognizer.
     if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {
     }
 
     SUnit *FoundSUnit = 0;
-    
+
     bool HasNoopHazards = false;
     while (!AvailableQueue->empty()) {
       SUnit *CurSUnit = AvailableQueue->pop();
-      
+
       ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit);
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
       if (HT == ScheduleHazardRecognizer::NoHazard) {
         FoundSUnit = CurSUnit;
         break;
       }
-    
+
       // Remember if this is a noop hazard.
       HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-      
+
       NotReady.push_back(CurSUnit);
     }
-    
+
     // Add the nodes that aren't ready back onto the available list.
     if (!NotReady.empty()) {
       AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // If this is a pseudo-op node, we don't want to increment the current
       // cycle.
       if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
-        ++CurCycle;        
+        ++CurCycle;
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
@@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
 ScheduleDAGSDNodes *
 llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
-  return new ScheduleDAGList(*IS->MF,
-                             new LatencyPriorityQueue(),
-                             IS->CreateTargetHazardRecognizer());
+  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4c3e4e3b0768..0b548b277f4c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,10 @@ static RegisterScheduler
                       "which tries to balance ILP and register pressure",
                       createILPListDAGScheduler);
 
+static cl::opt<bool> DisableSchedCycles(
+  "disable-sched-cycles", cl::Hidden, cl::init(false),
+  cl::desc("Disable cycle-level precision during preRA scheduling"));
+
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +88,56 @@ private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   SchedulingPriorityQueue *AvailableQueue;
 
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// CurCycle - The current scheduler state corresponds to this cycle.
+  unsigned CurCycle;
+
+  /// MinAvailableCycle - Cycle of the soonest available instruction.
+  unsigned MinAvailableCycle;
+
   /// LiveRegDefs - A set of physical registers and their definition
   /// that are "live". These nodes must be scheduled before any other nodes that
   /// modifies the registers can be scheduled.
   unsigned NumLiveRegs;
   std::vector<SUnit*> LiveRegDefs;
-  std::vector<unsigned> LiveRegCycles;
+  std::vector<SUnit*> LiveRegGens;
 
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
 
 public:
-  ScheduleDAGRRList(MachineFunction &mf,
-                    bool isbottomup, bool needlatency,
-                    SchedulingPriorityQueue *availqueue)
-    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
-      AvailableQueue(availqueue), Topo(SUnits) {
-    }
+  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+                    SchedulingPriorityQueue *availqueue,
+                    CodeGenOpt::Level OptLevel)
+    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+      Topo(SUnits) {
+
+    const TargetMachine &tm = mf.getTarget();
+    if (DisableSchedCycles || !NeedLatency)
+      HazardRec = new ScheduleHazardRecognizer();
+    else
+      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
 
   ~ScheduleDAGRRList() {
+    delete HazardRec;
     delete AvailableQueue;
   }
 
   void Schedule();
 
+  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
   /// IsReachable - Checks if SU is reachable from TargetSU.
   bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
     return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +166,37 @@ public:
   }
 
 private:
+  bool isReady(SUnit *SU) {
+    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+      AvailableQueue->isReady(SU);
+  }
+
   void ReleasePred(SUnit *SU, const SDep *PredEdge);
-  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ReleasePredecessors(SUnit *SU);
   void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
   void ReleaseSuccessors(SUnit *SU);
+  void ReleasePending();
+  void AdvanceToCycle(unsigned NextCycle);
+  void AdvancePastStalls(SUnit *SU);
+  void EmitNode(SUnit *SU);
+  void ScheduleNodeBottomUp(SUnit*);
   void CapturePred(SDep *PredEdge);
-  void ScheduleNodeBottomUp(SUnit*, unsigned);
-  void ScheduleNodeTopDown(SUnit*, unsigned);
   void UnscheduleNodeBottomUp(SUnit*);
-  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+  void RestoreHazardCheckerBottomUp();
+  void BacktrackBottomUp(SUnit*, SUnit*);
   SUnit *CopyAndMoveSuccessors(SUnit*);
   void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
                                 const TargetRegisterClass*,
                                 const TargetRegisterClass*,
                                 SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
-  void ListScheduleTopDown();
+
+  SUnit *PickNodeToScheduleBottomUp();
   void ListScheduleBottomUp();
 
+  void ScheduleNodeTopDown(SUnit*);
+  void ListScheduleTopDown();
+
 
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
   /// Updates the topological ordering if required.
@@ -190,11 +233,13 @@ private:
 void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
-        << " **********\n");
+        << " '" << BB->getName() << "' **********\n");
 
+  CurCycle = 0;
+  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
-  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+  LiveRegGens.resize(TRI->getNumRegs(), NULL);
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {
   Topo.InitDAGTopologicalSorting();
 
   AvailableQueue->initNodes(SUnits);
-  
+
+  HazardRec->Reset();
+
   // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
   if (isBottomUp)
     ListScheduleBottomUp();
   else
     ListScheduleTopDown();
-  
+
   AvailableQueue->releaseState();
 }
 
@@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   // to be scheduled. Ignore the special EntrySU node.
   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
     PredSU->isAvailable = true;
-    AvailableQueue->push(PredSU);
+
+    unsigned Height = PredSU->getHeight();
+    if (Height < MinAvailableCycle)
+      MinAvailableCycle = Height;
+
+    if (isReady(SU)) {
+      AvailableQueue->push(PredSU);
+    }
+    // CapturePred and others may have left the node in the pending queue, avoid
+    // adding it twice.
+    else if (!PredSU->isPending) {
+      PredSU->isPending = true;
+      PendingQueue.push_back(PredSU);
+    }
   }
 }
 
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
   // Bottom up: release predecessors
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     ReleasePred(SU, &*I);
     if (I->isAssignedRegDep()) {
       // This is a physical register dependency and it's impossible or
-      // expensive to copy the register. Make sure nothing that can 
+      // expensive to copy the register. Make sure nothing that can
       // clobber the register is scheduled between the predecessor and
       // this node.
-      if (!LiveRegDefs[I->getReg()]) {
+      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+             "interference on register dependence");
+      LiveRegDefs[I->getReg()] = I->getSUnit();
+      if (!LiveRegGens[I->getReg()]) {
         ++NumLiveRegs;
-        LiveRegDefs[I->getReg()] = I->getSUnit();
-        LiveRegCycles[I->getReg()] = CurCycle;
+        LiveRegGens[I->getReg()] = SU;
       }
     }
   }
 }
 
+/// Check to see if any of the pending instructions are ready to issue.  If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+  if (DisableSchedCycles) {
+    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+    return;
+  }
+
+  // If the available queue is empty, it is safe to reset MinAvailableCycle.
+  if (AvailableQueue->empty())
+    MinAvailableCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+    unsigned ReadyCycle =
+      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    if (ReadyCycle < MinAvailableCycle)
+      MinAvailableCycle = ReadyCycle;
+
+    if (PendingQueue[i]->isAvailable) {
+      if (!isReady(PendingQueue[i]))
+          continue;
+      AvailableQueue->push(PendingQueue[i]);
+    }
+    PendingQueue[i]->isPending = false;
+    PendingQueue[i] = PendingQueue.back();
+    PendingQueue.pop_back();
+    --i; --e;
+  }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+  if (NextCycle <= CurCycle)
+    return;
+
+  AvailableQueue->setCurCycle(NextCycle);
+  if (!HazardRec->isEnabled()) {
+    // Bypass lots of virtual calls in case of long latency.
+    CurCycle = NextCycle;
+  }
+  else {
+    for (; CurCycle != NextCycle; ++CurCycle) {
+      if (isBottomUp)
+        HazardRec->RecedeCycle();
+      else
+        HazardRec->AdvanceCycle();
+    }
+  }
+  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+  // available Q to release pending nodes at least once before popping.
+  ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+  if (DisableSchedCycles)
+    return;
+
+  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+  // Bump CurCycle to account for latency. We assume the latency of other
+  // available instructions may be hidden by the stall (not a full pipe stall).
+  // This updates the hazard recognizer's cycle before reserving resources for
+  // this instruction.
+  AdvanceToCycle(ReadyCycle);
+
+  // Calls are scheduled in their preceding cycle, so don't conflict with
+  // hazards from instructions after the call. EmitNode will reset the
+  // scoreboard state before emitting the call.
+  if (isBottomUp && SU->isCall)
+    return;
+
+  // FIXME: For resource conflicts in very long non-pipelined stages, we
+  // should probably skip ahead here to avoid useless scoreboard checks.
+  int Stalls = 0;
+  while (true) {
+    ScheduleHazardRecognizer::HazardType HT =
+      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+    if (HT == ScheduleHazardRecognizer::NoHazard)
+      break;
+
+    ++Stalls;
+  }
+  AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+  if (!HazardRec->isEnabled())
+    return;
+
+  // Check for phys reg copy.
+  if (!SU->getNode())
+    return;
+
+  switch (SU->getNode()->getOpcode()) {
+  default:
+    assert(SU->getNode()->isMachineOpcode() &&
+           "This target-independent node should not be scheduled.");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor:
+  case ISD::CopyToReg:
+  case ISD::CopyFromReg:
+  case ISD::EH_LABEL:
+    // Noops don't affect the scoreboard state. Copies are likely to be
+    // removed.
+    return;
+  case ISD::INLINEASM:
+    // For inline asm, clear the pipeline state.
+    HazardRec->Reset();
+    return;
+  }
+  if (isBottomUp && SU->isCall) {
+    // Calls are scheduled with their preceding instructions. For bottom-up
+    // scheduling, clear the pipeline state before emitting.
+    HazardRec->Reset();
+  }
+
+  HazardRec->EmitInstruction(SU);
+
+  if (!isBottomUp && SU->isCall) {
+    HazardRec->Reset();
+  }
+}
+
 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
   DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
@@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
     DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
 #endif
 
-  // FIXME: Handle noop hazard.
+  // FIXME: Do not modify node height. It may interfere with
+  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+  // node it's ready cycle can aid heuristics, and after scheduling it can
+  // indicate the scheduled cycle.
   SU->setHeightToAtLeast(CurCycle);
+
+  // Reserve resources for the scheduled intruction.
+  EmitNode(SU);
+
   Sequence.push_back(SU);
 
   AvailableQueue->ScheduledNode(SU);
 
-  ReleasePredecessors(SU, CurCycle);
+  // Update liveness of predecessors before successors to avoid treating a
+  // two-address node as a live range def.
+  ReleasePredecessors(SU);
 
   // Release all the implicit physical register defs that are live.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
-        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
-        assert(LiveRegDefs[I->getReg()] == SU &&
-               "Physical register dependency violated?");
-        --NumLiveRegs;
-        LiveRegDefs[I->getReg()] = NULL;
-        LiveRegCycles[I->getReg()] = 0;
-      }
+    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
     }
   }
 
   SU->isScheduled = true;
+
+  // Conditions under which the scheduler should eagerly advance the cycle:
+  // (1) No available instructions
+  // (2) All pipelines full, so available instructions must have hazards.
+  //
+  // If HazardRec is disabled, count each inst as one cycle.
+  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
+      || AvailableQueue->empty())
+    AdvanceToCycle(CurCycle + 1);
 }
 
 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
 /// unscheduled, incrcease the succ left count of its predecessors. Remove
 /// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {  
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
   SUnit *PredSU = PredEdge->getSUnit();
   if (PredSU->isAvailable) {
     PredSU->isAvailable = false;
@@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     CapturePred(&*I);
-    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
       assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
              "Physical register dependency violated?");
       --NumLiveRegs;
       LiveRegDefs[I->getReg()] = NULL;
-      LiveRegCycles[I->getReg()] = 0;
+      LiveRegGens[I->getReg()] = NULL;
     }
   }
 
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
+      // This becomes the nearest def. Note that an earlier def may still be
+      // pending if this is a two-address node.
+      LiveRegDefs[I->getReg()] = SU;
       if (!LiveRegDefs[I->getReg()]) {
-        LiveRegDefs[I->getReg()] = SU;
         ++NumLiveRegs;
       }
-      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
-        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+      if (LiveRegGens[I->getReg()] == NULL ||
+          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+        LiveRegGens[I->getReg()] = I->getSUnit();
     }
   }
+  if (SU->getHeight() < MinAvailableCycle)
+    MinAvailableCycle = SU->getHeight();
 
   SU->setHeightDirty();
   SU->isScheduled = false;
   SU->isAvailable = true;
-  AvailableQueue->push(SU);
+  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+    // Don't make available until backtracking is complete.
+    SU->isPending = true;
+    PendingQueue.push_back(SU);
+  }
+  else {
+    AvailableQueue->push(SU);
+  }
   AvailableQueue->UnscheduledNode(SU);
 }
 
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+  HazardRec->Reset();
+
+  unsigned LookAhead = std::min((unsigned)Sequence.size(),
+                                HazardRec->getMaxLookAhead());
+  if (LookAhead == 0)
+    return;
+
+  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+  unsigned HazardCycle = (*I)->getHeight();
+  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+    SUnit *SU = *I;
+    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+      HazardRec->RecedeCycle();
+    }
+    EmitNode(SU);
+  }
+}
+
 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
 /// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
-                                          unsigned &CurCycle) {
-  SUnit *OldSU = NULL;
-  while (CurCycle > BtCycle) {
-    OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+  SUnit *OldSU = Sequence.back();
+  while (true) {
     Sequence.pop_back();
     if (SU->isSucc(OldSU))
       // Don't try to remove SU from AvailableQueue.
       SU->isAvailable = false;
+    // FIXME: use ready cycle instead of height
+    CurCycle = OldSU->getHeight();
     UnscheduleNodeBottomUp(OldSU);
-    --CurCycle;
     AvailableQueue->setCurCycle(CurCycle);
+    if (OldSU == BtSU)
+      break;
+    OldSU = Sequence.back();
   }
 
   assert(!SU->isSucc(OldSU) && "Something is wrong!");
 
+  RestoreHazardCheckerBottomUp();
+
+  ReleasePending();
+
   ++NumBacktracks;
 }
 
 static bool isOperandOf(const SUnit *SU, SDNode *N) {
   for (const SDNode *SUNode = SU->getNode(); SUNode;
-       SUNode = SUNode->getFlaggedNode()) {
+       SUNode = SUNode->getGluedNode()) {
     if (SUNode->isOperandOf(N))
       return true;
   }
@@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
 /// successors to the newly created node.
 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
-  if (SU->getNode()->getFlaggedNode())
-    return NULL;
-
   SDNode *N = SU->getNode();
   if (!N)
     return NULL;
 
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
   SUnit *NewSU;
   bool TryUnfold = false;
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
     else if (VT == MVT::Other)
       TryUnfold = true;
@@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     const SDValue &Op = N->getOperand(i);
     EVT VT = Op.getNode()->getValueType(Op.getResNo());
-    if (VT == MVT::Flag)
+    if (VT == MVT::Glue)
       return NULL;
   }
 
@@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     } else {
       LoadSU = CreateNewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
+
+      InitNumRegDefsLeft(LoadSU);
       ComputeLatency(LoadSU);
     }
 
     SUnit *NewSU = CreateNewSUnit(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
-      
+
     const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
     for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
       if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
@@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
+
+    InitNumRegDefsLeft(NewSU);
     ComputeLatency(NewSU);
 
     // Record all the edges to and from the old SU, by category.
@@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       RemovePred(SuccDep, D);
       D.setSUnit(NewSU);
       AddPred(SuccDep, D);
+      // Balance register pressure.
+      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+        --NewSU->NumRegDefsLeft;
     }
     for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
       SDep D = ChainSuccs[i];
@@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
         D.setSUnit(LoadSU);
         AddPred(SuccDep, D);
       }
-    } 
+    }
 
     // Add a data dependency to reflect that NewSU reads the value defined
     // by LoadSU.
@@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 
 /// CheckForLiveRegDef - Return true and update live register vector if the
 /// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                std::vector<SUnit*> &LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVector<unsigned, 4> &LRegs,
                                const TargetRegisterInfo *TRI) {
-  bool Added = false;
-  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
-    if (RegAdded.insert(Reg)) {
+  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+    // Check if Ref is live.
+    if (!LiveRegDefs[Reg]) continue;
+
+    // Allow multiple uses of the same def.
+    if (LiveRegDefs[Reg] == SU) continue;
+
+    // Add Reg to the set of interfering live regs.
+    if (RegAdded.insert(Reg))
       LRegs.push_back(Reg);
-      Added = true;
-    }
   }
-  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
-    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
-      if (RegAdded.insert(*Alias)) {
-        LRegs.push_back(*Alias);
-        Added = true;
-      }
-    }
-  return Added;
 }
 
 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
 /// scheduling of the given node to satisfy live physical register dependencies.
 /// If the specific node is the last one that's available to schedule, do
 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
-                                                 SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
   if (NumLiveRegs == 0)
     return false;
 
   SmallSet<unsigned, 4> RegAdded;
   // If this node would clobber any "live" register, then it's not ready.
+  //
+  // If SU is the currently live definition of the same register that it uses,
+  // then we are free to schedule it.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (I->isAssignedRegDep())
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
       CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
                          RegAdded, LRegs, TRI);
   }
 
-  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
     if (Node->getOpcode() == ISD::INLINEASM) {
       // Inline asm can clobber physical defs.
       unsigned NumOps = Node->getNumOperands();
-      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
-        --NumOps;  // Ignore the flag operand.
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
 
       for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
         unsigned Flags =
@@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
     for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
+
   return !LRegs.empty();
 }
 
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+  SmallVector<SUnit*, 4> Interferences;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+  SUnit *CurSU = AvailableQueue->pop();
+  while (CurSU) {
+    SmallVector<unsigned, 4> LRegs;
+    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+      break;
+    LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+    Interferences.push_back(CurSU);
+    CurSU = AvailableQueue->pop();
+  }
+  if (CurSU) {
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+      Interferences[i]->isPending = false;
+      assert(Interferences[i]->isAvailable && "must still be available");
+      AvailableQueue->push(Interferences[i]);
+    }
+    return CurSU;
+  }
+
+  // All candidates are delayed due to live physical reg dependencies.
+  // Try backtracking, code duplication, or inserting cross class copies
+  // to resolve it.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    SUnit *TrySU = Interferences[i];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+    // Try unscheduling up to the point where it's safe to schedule
+    // this node.
+    SUnit *BtSU = NULL;
+    unsigned LiveCycle = UINT_MAX;
+    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+      unsigned Reg = LRegs[j];
+      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+        BtSU = LiveRegGens[Reg];
+        LiveCycle = BtSU->getHeight();
+      }
+    }
+    if (!WillCreateCycle(TrySU, BtSU))  {
+      BacktrackBottomUp(TrySU, BtSU);
+
+      // Force the current node to be scheduled before the node that
+      // requires the physical reg dep.
+      if (BtSU->isAvailable) {
+        BtSU->isAvailable = false;
+        if (!BtSU->isPending)
+          AvailableQueue->remove(BtSU);
+      }
+      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false, /*isArtificial=*/true));
+
+      // If one or more successors has been unscheduled, then the current
+      // node is no longer avaialable. Schedule a successor that's now
+      // available instead.
+      if (!TrySU->isAvailable) {
+        CurSU = AvailableQueue->pop();
+      }
+      else {
+        CurSU = TrySU;
+        TrySU->isPending = false;
+        Interferences.erase(Interferences.begin()+i);
+      }
+      break;
+    }
+  }
+
+  if (!CurSU) {
+    // Can't backtrack. If it's too expensive to copy the value, then try
+    // duplicate the nodes that produces these "too expensive to copy"
+    // values to break the dependency. In case even that doesn't work,
+    // insert cross class copies.
+    // If it's not too expensive, i.e. cost != -1, issue copies.
+    SUnit *TrySU = Interferences[0];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    assert(LRegs.size() == 1 && "Can't handle this yet!");
+    unsigned Reg = LRegs[0];
+    SUnit *LRDef = LiveRegDefs[Reg];
+    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+    const TargetRegisterClass *RC =
+      TRI->getMinimalPhysRegClass(Reg, VT);
+    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+    // If cross copy register class is null, then it must be possible copy
+    // the value directly. Do not try duplicate the def.
+    SUnit *NewDef = 0;
+    if (DestRC)
+      NewDef = CopyAndMoveSuccessors(LRDef);
+    else
+      DestRC = RC;
+    if (!NewDef) {
+      // Issue copies, these can be expensive cross register class copies.
+      SmallVector<SUnit*, 2> Copies;
+      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
+            << " to SU #" << Copies.front()->NodeNum << "\n");
+      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false,
+                          /*isArtificial=*/true));
+      NewDef = Copies.back();
+    }
+
+    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
+          << " to SU #" << TrySU->NodeNum << "\n");
+    LiveRegDefs[Reg] = NewDef;
+    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                         /*Reg=*/0, /*isNormalMemory=*/false,
+                         /*isMustAlias=*/false,
+                         /*isArtificial=*/true));
+    TrySU->isAvailable = false;
+    CurSU = NewDef;
+  }
+
+  assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+  // Add the nodes that aren't ready back onto the available list.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    Interferences[i]->isPending = false;
+    // May no longer be available due to backtracking.
+    if (Interferences[i]->isAvailable) {
+      AvailableQueue->push(Interferences[i]);
+    }
+  }
+  return CurSU;
+}
 
 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
 /// schedulers.
 void ScheduleDAGRRList::ListScheduleBottomUp() {
-  unsigned CurCycle = 0;
-
   // Release any predecessors of the special Exit node.
-  ReleasePredecessors(&ExitSU, CurCycle);
+  ReleasePredecessors(&ExitSU);
 
   // Add root to Available queue.
   if (!SUnits.empty()) {
@@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
 
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
-  SmallVector<SUnit*, 4> NotReady;
-  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
-    bool Delayed = false;
-    LRegsMap.clear();
-    SUnit *CurSU = AvailableQueue->pop();
-    while (CurSU) {
-      SmallVector<unsigned, 4> LRegs;
-      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
-        break;
-      Delayed = true;
-      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+    DEBUG(dbgs() << "\n*** Examining Available\n";
+          AvailableQueue->dump(this));
 
-      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
-      NotReady.push_back(CurSU);
-      CurSU = AvailableQueue->pop();
-    }
+    // Pick the best node to schedule taking all constraints into
+    // consideration.
+    SUnit *SU = PickNodeToScheduleBottomUp();
 
-    // All candidates are delayed due to live physical reg dependencies.
-    // Try backtracking, code duplication, or inserting cross class copies
-    // to resolve it.
-    if (Delayed && !CurSU) {
-      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
-        SUnit *TrySU = NotReady[i];
-        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
-        // Try unscheduling up to the point where it's safe to schedule
-        // this node.
-        unsigned LiveCycle = CurCycle;
-        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
-          unsigned Reg = LRegs[j];
-          unsigned LCycle = LiveRegCycles[Reg];
-          LiveCycle = std::min(LiveCycle, LCycle);
-        }
-        SUnit *OldSU = Sequence[LiveCycle];
-        if (!WillCreateCycle(TrySU, OldSU))  {
-          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
-          // Force the current node to be scheduled before the node that
-          // requires the physical reg dep.
-          if (OldSU->isAvailable) {
-            OldSU->isAvailable = false;
-            AvailableQueue->remove(OldSU);
-          }
-          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false, /*isArtificial=*/true));
-          // If one or more successors has been unscheduled, then the current
-          // node is no longer avaialable. Schedule a successor that's now
-          // available instead.
-          if (!TrySU->isAvailable)
-            CurSU = AvailableQueue->pop();
-          else {
-            CurSU = TrySU;
-            TrySU->isPending = false;
-            NotReady.erase(NotReady.begin()+i);
-          }
-          break;
-        }
-      }
+    AdvancePastStalls(SU);
 
-      if (!CurSU) {
-        // Can't backtrack. If it's too expensive to copy the value, then try
-        // duplicate the nodes that produces these "too expensive to copy"
-        // values to break the dependency. In case even that doesn't work,
-        // insert cross class copies.
-        // If it's not too expensive, i.e. cost != -1, issue copies.
-        SUnit *TrySU = NotReady[0];
-        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-        assert(LRegs.size() == 1 && "Can't handle this yet!");
-        unsigned Reg = LRegs[0];
-        SUnit *LRDef = LiveRegDefs[Reg];
-        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-        const TargetRegisterClass *RC =
-          TRI->getMinimalPhysRegClass(Reg, VT);
-        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
-        // If cross copy register class is null, then it must be possible copy
-        // the value directly. Do not try duplicate the def.
-        SUnit *NewDef = 0;
-        if (DestRC)
-          NewDef = CopyAndMoveSuccessors(LRDef);
-        else
-          DestRC = RC;
-        if (!NewDef) {
-          // Issue copies, these can be expensive cross register class copies.
-          SmallVector<SUnit*, 2> Copies;
-          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
-                       << " to SU #" << Copies.front()->NodeNum << "\n");
-          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
-                              /*Reg=*/0, /*isNormalMemory=*/false,
-                              /*isMustAlias=*/false,
-                              /*isArtificial=*/true));
-          NewDef = Copies.back();
-        }
+    ScheduleNodeBottomUp(SU);
 
-        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
-                     << " to SU #" << TrySU->NodeNum << "\n");
-        LiveRegDefs[Reg] = NewDef;
-        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
-                             /*Reg=*/0, /*isNormalMemory=*/false,
-                             /*isMustAlias=*/false,
-                             /*isArtificial=*/true));
-        TrySU->isAvailable = false;
-        CurSU = NewDef;
-      }
-
-      assert(CurSU && "Unable to resolve live physical register dependencies!");
-    }
-
-    // Add the nodes that aren't ready back onto the available list.
-    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
-      NotReady[i]->isPending = false;
-      // May no longer be available due to backtracking.
-      if (NotReady[i]->isAvailable)
-        AvailableQueue->push(NotReady[i]);
+    while (AvailableQueue->empty() && !PendingQueue.empty()) {
+      // Advance the cycle to free resources. Skip ahead to the next ready SU.
+      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
     }
-    NotReady.clear();
-
-    if (CurSU)
-      ScheduleNodeBottomUp(CurSU, CurCycle);
-    ++CurCycle;
-    AvailableQueue->setCurCycle(CurCycle);
   }
 
   // Reverse the order if it is bottom up.
   std::reverse(Sequence.begin(), Sequence.end());
-  
+
 #ifndef NDEBUG
   VerifySchedule(isBottomUp);
 #endif
@@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
   DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
@@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
 void ScheduleDAGRRList::ListScheduleTopDown() {
-  unsigned CurCycle = 0;
   AvailableQueue->setCurCycle(CurCycle);
 
   // Release any successors of the special Entry node.
@@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
       SUnits[i].isAvailable = true;
     }
   }
-  
+
   // While Available queue is not empty, grab the node with the highest
   // priority. If it is not ready put it back.  Schedule the node.
   Sequence.reserve(SUnits.size());
   while (!AvailableQueue->empty()) {
     SUnit *CurSU = AvailableQueue->pop();
-    
+
     if (CurSU)
-      ScheduleNodeTopDown(CurSU, CurCycle);
+      ScheduleNodeTopDown(CurSU);
     ++CurCycle;
     AvailableQueue->setCurCycle(CurCycle);
   }
-  
+
 #ifndef NDEBUG
   VerifySchedule(isBottomUp);
 #endif
@@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
 
 
 //===----------------------------------------------------------------------===//
-//                RegReductionPriorityQueue Implementation
+//                RegReductionPriorityQueue Definition
 //===----------------------------------------------------------------------===//
 //
 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
 // to reduce register pressure.
-// 
+//
 namespace {
-  template<class SF>
-  class RegReductionPriorityQueue;
-  
-  /// bu_ls_rr_sort - Priority function for bottom up register pressure
-  // reduction scheduler.
-  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
-    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
-    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
   };
 
-  // td_ls_rr_sort - Priority function for top down register pressure reduction
-  // scheduler.
-  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
-    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
-    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  RegReductionPQBase *SPQ;
+  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = false,
+    HasReadyFilter = false
   };
 
-  // src_ls_rr_sort - Priority function for source order scheduler.
-  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
-    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    src_ls_rr_sort(const src_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  RegReductionPQBase *SPQ;
+  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
   };
 
-  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
-  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
-    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
+  RegReductionPQBase *SPQ;
+  src_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  src_ls_rr_sort(const src_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = true
   };
 
-  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
-  // scheduler.
-  struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
-    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
-      : SPQ(spq) {}
-    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
-      : SPQ(RHS.SPQ) {}
+  RegReductionPQBase *SPQ;
+  hybrid_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
 
-    bool operator()(const SUnit* left, const SUnit* right) const;
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = true
   };
-}  // end anonymous namespace
+
+  RegReductionPQBase *SPQ;
+  ilp_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+  std::vector<SUnit*> Queue;
+  unsigned CurQueueId;
+  bool TracksRegPressure;
+
+  // SUnits - The SUnits for the current graph.
+  std::vector<SUnit> *SUnits;
+
+  MachineFunction &MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+  ScheduleDAGRRList *scheduleDAG;
+
+  // SethiUllmanNumbers - The SethiUllman number for each node.
+  std::vector<unsigned> SethiUllmanNumbers;
+
+  /// RegPressure - Tracking current reg pressure per register class.
+  ///
+  std::vector<unsigned> RegPressure;
+
+  /// RegLimit - Tracking the number of allocatable registers per register
+  /// class.
+  std::vector<unsigned> RegLimit;
+
+public:
+  RegReductionPQBase(MachineFunction &mf,
+                     bool hasReadyFilter,
+                     bool tracksrp,
+                     const TargetInstrInfo *tii,
+                     const TargetRegisterInfo *tri,
+                     const TargetLowering *tli)
+    : SchedulingPriorityQueue(hasReadyFilter),
+      CurQueueId(0), TracksRegPressure(tracksrp),
+      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+    if (TracksRegPressure) {
+      unsigned NumRC = TRI->getNumRegClasses();
+      RegLimit.resize(NumRC);
+      RegPressure.resize(NumRC);
+      std::fill(RegLimit.begin(), RegLimit.end(), 0);
+      std::fill(RegPressure.begin(), RegPressure.end(), 0);
+      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I)
+        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+    }
+  }
+
+  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+    scheduleDAG = scheduleDag;
+  }
+
+  ScheduleHazardRecognizer* getHazardRec() {
+    return scheduleDAG->getHazardRec();
+  }
+
+  void initNodes(std::vector<SUnit> &sunits);
+
+  void addNode(const SUnit *SU);
+
+  void updateNode(const SUnit *SU);
+
+  void releaseState() {
+    SUnits = 0;
+    SethiUllmanNumbers.clear();
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+  }
+
+  unsigned getNodePriority(const SUnit *SU) const;
+
+  unsigned getNodeOrdering(const SUnit *SU) const {
+    return scheduleDAG->DAG->GetOrdering(SU->getNode());
+  }
+
+  bool empty() const { return Queue.empty(); }
+
+  void push(SUnit *U) {
+    assert(!U->NodeQueueId && "Node in the queue already");
+    U->NodeQueueId = ++CurQueueId;
+    Queue.push_back(U);
+  }
+
+  void remove(SUnit *SU) {
+    assert(!Queue.empty() && "Queue is empty!");
+    assert(SU->NodeQueueId != 0 && "Not in queue!");
+    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+                                                 SU);
+    if (I != prior(Queue.end()))
+      std::swap(*I, Queue.back());
+    Queue.pop_back();
+    SU->NodeQueueId = 0;
+  }
+
+  bool tracksRegPressure() const { return TracksRegPressure; }
+
+  void dumpRegPressure() const;
+
+  bool HighRegPressure(const SUnit *SU) const;
+
+  bool MayReduceRegPressure(SUnit *SU);
+
+  void ScheduledNode(SUnit *SU);
+
+  void UnscheduledNode(SUnit *SU);
+
+protected:
+  bool canClobber(const SUnit *SU, const SUnit *Op);
+  void AddPseudoTwoAddrDeps();
+  void PrescheduleNodesWithMultipleUses();
+  void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+    std::vector<SUnit *>::iterator Best = Q.begin();
+    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+           E = Q.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+    SUnit *V = *Best;
+    if (Best != prior(Q.end()))
+      std::swap(*Best, Q.back());
+    Q.pop_back();
+    return V;
+  }
+
+  SF Picker;
+
+public:
+  RegReductionPriorityQueue(MachineFunction &mf,
+                            bool tracksrp,
+                            const TargetInstrInfo *tii,
+                            const TargetRegisterInfo *tri,
+                            const TargetLowering *tli)
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+      Picker(this) {}
+
+  bool isBottomUp() const { return SF::IsBottomUp; }
+
+  bool isReady(SUnit *U) const {
+    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+  }
+
+  SUnit *pop() {
+    if (Queue.empty()) return NULL;
+
+    SUnit *V = popFromQueue(Queue, Picker);
+    V->NodeQueueId = 0;
+    return V;
+  }
+
+  void dump(ScheduleDAG *DAG) const {
+    // Emulate pop() without clobbering NodeQueueIds.
+    std::vector<SUnit*> DumpQueue = Queue;
+    SF DumpPicker = Picker;
+    while (!DumpQueue.empty()) {
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      if (isBottomUp())
+        dbgs() << "Height " << SU->getHeight() << ": ";
+      else
+        dbgs() << "Depth " << SU->getDepth() << ": ";
+      SU->dump(DAG);
+    }
+  }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//           Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
 
 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 /// Smaller number is the higher priority.
@@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
 
   if (SethiUllmanNumber == 0)
     SethiUllmanNumber = 1;
-  
+
   return SethiUllmanNumber;
 }
 
-namespace {
-  template<class SF>
-  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
-    std::vector<SUnit*> Queue;
-    SF Picker;
-    unsigned CurQueueId;
-    bool TracksRegPressure;
-
-  protected:
-    // SUnits - The SUnits for the current graph.
-    std::vector<SUnit> *SUnits;
-
-    MachineFunction &MF;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo *TRI;
-    const TargetLowering *TLI;
-    ScheduleDAGRRList *scheduleDAG;
-
-    // SethiUllmanNumbers - The SethiUllman number for each node.
-    std::vector<unsigned> SethiUllmanNumbers;
-
-    /// RegPressure - Tracking current reg pressure per register class.
-    ///
-    std::vector<unsigned> RegPressure;
-
-    /// RegLimit - Tracking the number of allocatable registers per register
-    /// class.
-    std::vector<unsigned> RegLimit;
-
-  public:
-    RegReductionPriorityQueue(MachineFunction &mf,
-                              bool tracksrp,
-                              const TargetInstrInfo *tii,
-                              const TargetRegisterInfo *tri,
-                              const TargetLowering *tli)
-      : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
-        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
-      if (TracksRegPressure) {
-        unsigned NumRC = TRI->getNumRegClasses();
-        RegLimit.resize(NumRC);
-        RegPressure.resize(NumRC);
-        std::fill(RegLimit.begin(), RegLimit.end(), 0);
-        std::fill(RegPressure.begin(), RegPressure.end(), 0);
-        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-               E = TRI->regclass_end(); I != E; ++I)
-          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
-      }
-    }
-    
-    void initNodes(std::vector<SUnit> &sunits) {
-      SUnits = &sunits;
-      // Add pseudo dependency edges for two-address nodes.
-      AddPseudoTwoAddrDeps();
-      // Reroute edges to nodes with multiple uses.
-      PrescheduleNodesWithMultipleUses();
-      // Calculate node priorities.
-      CalculateSethiUllmanNumbers();
-    }
-
-    void addNode(const SUnit *SU) {
-      unsigned SUSize = SethiUllmanNumbers.size();
-      if (SUnits->size() > SUSize)
-        SethiUllmanNumbers.resize(SUSize*2, 0);
-      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
-    }
-
-    void updateNode(const SUnit *SU) {
-      SethiUllmanNumbers[SU->NodeNum] = 0;
-      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
-    }
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
 
-    void releaseState() {
-      SUnits = 0;
-      SethiUllmanNumbers.clear();
-      std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    }
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
 
-    unsigned getNodePriority(const SUnit *SU) const {
-      assert(SU->NodeNum < SethiUllmanNumbers.size());
-      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
-      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
-        // CopyToReg should be close to its uses to facilitate coalescing and
-        // avoid spilling.
-        return 0;
-      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-          Opc == TargetOpcode::SUBREG_TO_REG ||
-          Opc == TargetOpcode::INSERT_SUBREG)
-        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
-        // close to their uses to facilitate coalescing.
-        return 0;
-      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
-        // If SU does not have a register use, i.e. it doesn't produce a value
-        // that would be consumed (e.g. store), then it terminates a chain of
-        // computation.  Give it a large SethiUllman number so it will be
-        // scheduled right before its predecessors that it doesn't lengthen
-        // their live ranges.
-        return 0xffff;
-      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
-        // If SU does not have a register def, schedule it close to its uses
-        // because it does not lengthen any live ranges.
-        return 0;
-      return SethiUllmanNumbers[SU->NodeNum];
-    }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+}
 
-    unsigned getNodeOrdering(const SUnit *SU) const {
-      return scheduleDAG->DAG->GetOrdering(SU->getNode());
-    }
+void RegReductionPQBase::addNode(const SUnit *SU) {
+  unsigned SUSize = SethiUllmanNumbers.size();
+  if (SUnits->size() > SUSize)
+    SethiUllmanNumbers.resize(SUSize*2, 0);
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
 
-    bool empty() const { return Queue.empty(); }
-    
-    void push(SUnit *U) {
-      assert(!U->NodeQueueId && "Node in the queue already");
-      U->NodeQueueId = ++CurQueueId;
-      Queue.push_back(U);
-    }
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+  SethiUllmanNumbers[SU->NodeNum] = 0;
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
 
-    SUnit *pop() {
-      if (empty()) return NULL;
-      std::vector<SUnit *>::iterator Best = Queue.begin();
-      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
-           E = Queue.end(); I != E; ++I)
-        if (Picker(*Best, *I))
-          Best = I;
-      SUnit *V = *Best;
-      if (Best != prior(Queue.end()))
-        std::swap(*Best, Queue.back());
-      Queue.pop_back();
-      V->NodeQueueId = 0;
-      return V;
-    }
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+  assert(SU->NodeNum < SethiUllmanNumbers.size());
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return 0;
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return 0;
+  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+    // If SU does not have a register use, i.e. it doesn't produce a value
+    // that would be consumed (e.g. store), then it terminates a chain of
+    // computation.  Give it a large SethiUllman number so it will be
+    // scheduled right before its predecessors that it doesn't lengthen
+    // their live ranges.
+    return 0xffff;
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return 0;
+  return SethiUllmanNumbers[SU->NodeNum];
+}
 
-    void remove(SUnit *SU) {
-      assert(!Queue.empty() && "Queue is empty!");
-      assert(SU->NodeQueueId != 0 && "Not in queue!");
-      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
-                                                   SU);
-      if (I != prior(Queue.end()))
-        std::swap(*I, Queue.back());
-      Queue.pop_back();
-      SU->NodeQueueId = 0;
-    }
+//===----------------------------------------------------------------------===//
+//                     Register Pressure Tracking
+//===----------------------------------------------------------------------===//
 
-    bool HighRegPressure(const SUnit *SU) const {
-      if (!TLI)
-        return false;
+void RegReductionPQBase::dumpRegPressure() const {
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+    const TargetRegisterClass *RC = *I;
+    unsigned Id = RC->getID();
+    unsigned RP = RegPressure[Id];
+    if (!RP) continue;
+    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+          << '\n');
+  }
+}
 
-      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            unsigned Cost = TLI->getRepRegClassCostFor(VT);
-            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-              return true;
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] >= RegLimit[RCId])
-            return true; // Reg pressure already high.
-          unsigned Cost = TLI->getRepRegClassCostFor(VT);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          // Check if this increases register pressure of the specific register
-          // class to the point where it would cause spills.
-          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
-            return true;
-        }
-      }
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+  if (!TLI)
+    return false;
 
-      return false;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+        return true;
     }
+  }
+  return false;
+}
 
-    void ScheduledNode(SUnit *SU) {
-      if (!TracksRegPressure)
-        return;
-
-      const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode()) {
-        if (N->getOpcode() != ISD::CopyToReg)
-          return;
-      } else {
-        unsigned Opc = N->getMachineOpcode();
-        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-            Opc == TargetOpcode::INSERT_SUBREG ||
-            Opc == TargetOpcode::SUBREG_TO_REG ||
-            Opc == TargetOpcode::REG_SEQUENCE ||
-            Opc == TargetOpcode::IMPLICIT_DEF)
-          return;
-      }
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+  const SDNode *N = SU->getNode();
 
-      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
-          continue;
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-        }
-      }
+  if (!N->isMachineOpcode() || !SU->NumSuccs)
+    return false;
 
-      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
-      // may transfer data dependencies to CopyToReg.
-      if (SU->NumSuccs && N->isMachineOpcode()) {
-        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = N->getValueType(i);
-          if (!N->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-            // Register pressure tracking is imprecise. This can happen.
-            RegPressure[RCId] = 0;
-          else
-            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-        }
-      }
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      return true;
+  }
+  return false;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
 
-      dumpRegPressure();
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    // FIXME: The ScheduleDAG currently loses information about which of a
+    // node's values is consumed by each dependence. Consequently, if the node
+    // defines multiple register classes, we don't know which to pressurize
+    // here. Instead the following loop consumes the register defs in an
+    // arbitrary order. At least it handles the common case of clustered loads
+    // to the same class. For precise liveness, each SDep needs to indicate the
+    // result number. But that tightly couples the ScheduleDAG with the
+    // SelectionDAG making updates tricky. A simpler hack would be to attach a
+    // value type or register class to SDep.
+    //
+    // The most important aspect of register tracking is balancing the increase
+    // here with the reduction further below. Note that this SU may use multiple
+    // defs in PredSU. The can't be determined here, but we've already
+    // compensated by reducing NumRegDefsLeft in PredSU during
+    // ScheduleDAGSDNodes::AddSchedEdges.
+    --PredSU->NumRegDefsLeft;
+    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+      if (SkipRegDefs)
+        continue;
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      break;
     }
+  }
 
-    void UnscheduledNode(SUnit *SU) {
-      if (!TracksRegPressure)
-        return;
-
-      const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode()) {
-        if (N->getOpcode() != ISD::CopyToReg)
-          return;
-      } else {
-        unsigned Opc = N->getMachineOpcode();
-        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
-            Opc == TargetOpcode::INSERT_SUBREG ||
-            Opc == TargetOpcode::SUBREG_TO_REG ||
-            Opc == TargetOpcode::REG_SEQUENCE ||
-            Opc == TargetOpcode::IMPLICIT_DEF)
-          return;
-      }
+  // We should have this assert, but there may be dead SDNodes that never
+  // materialize as SUnits, so they don't appear to generate liveness.
+  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+  int SkipRegDefs = (int)SU->NumRegDefsLeft;
+  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+    if (SkipRegDefs > 0)
+      continue;
+    EVT VT = RegDefPos.GetValue();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+      // Register pressure tracking is imprecise. This can happen. But we try
+      // hard not to let it happen because it likely results in poor scheduling.
+      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
+      RegPressure[RCId] = 0;
+    }
+    else {
+      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+    }
+  }
+  dumpRegPressure();
+}
 
-      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-           I != E; ++I) {
-        if (I->isCtrl())
-          continue;
-        SUnit *PredSU = I->getSUnit();
-        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
-          continue;
-        const SDNode *PN = PredSU->getNode();
-        if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyFromReg) {
-            EVT VT = PN->getValueType(0);
-            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          }
-          continue;
-        }
-        unsigned POpc = PN->getMachineOpcode();
-        if (POpc == TargetOpcode::IMPLICIT_DEF)
-          continue;
-        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-          EVT VT = PN->getOperand(0).getValueType();
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;            
-        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-                   POpc == TargetOpcode::SUBREG_TO_REG) {
-          EVT VT = PN->getValueType(0);
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-          continue;
-        }
-        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
-        for (unsigned i = 0; i != NumDefs; ++i) {
-          EVT VT = PN->getValueType(i);
-          if (!PN->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-            // Register pressure tracking is imprecise. This can happen.
-            RegPressure[RCId] = 0;
-          else
-            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
-        }
-      }
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  const SDNode *N = SU->getNode();
+  if (!N->isMachineOpcode()) {
+    if (N->getOpcode() != ISD::CopyToReg)
+      return;
+  } else {
+    unsigned Opc = N->getMachineOpcode();
+    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG ||
+        Opc == TargetOpcode::REG_SEQUENCE ||
+        Opc == TargetOpcode::IMPLICIT_DEF)
+      return;
+  }
 
-      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
-      // may transfer data dependencies to CopyToReg.
-      if (SU->NumSuccs && N->isMachineOpcode()) {
-        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-          EVT VT = N->getValueType(i);
-          if (VT == MVT::Flag || VT == MVT::Other)
-            continue;
-          if (!N->hasAnyUseOfValue(i))
-            continue;
-          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-        }
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+    // counts data deps.
+    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+      continue;
+    const SDNode *PN = PredSU->getNode();
+    if (!PN->isMachineOpcode()) {
+      if (PN->getOpcode() == ISD::CopyFromReg) {
+        EVT VT = PN->getValueType(0);
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
       }
-
-      dumpRegPressure();
+      continue;
     }
-
-    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 
-      scheduleDAG = scheduleDag; 
+    unsigned POpc = PN->getMachineOpcode();
+    if (POpc == TargetOpcode::IMPLICIT_DEF)
+      continue;
+    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+      EVT VT = PN->getOperand(0).getValueType();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
+    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+               POpc == TargetOpcode::SUBREG_TO_REG) {
+      EVT VT = PN->getValueType(0);
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
     }
-
-    void dumpRegPressure() const {
-      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-             E = TRI->regclass_end(); I != E; ++I) {
-        const TargetRegisterClass *RC = *I;
-        unsigned Id = RC->getID();
-        unsigned RP = RegPressure[Id];
-        if (!RP) continue;
-        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
-              << '\n');
-      }
+    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+    for (unsigned i = 0; i != NumDefs; ++i) {
+      EVT VT = PN->getValueType(i);
+      if (!PN->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+        // Register pressure tracking is imprecise. This can happen.
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
     }
+  }
 
-  protected:
-    bool canClobber(const SUnit *SU, const SUnit *Op);
-    void AddPseudoTwoAddrDeps();
-    void PrescheduleNodesWithMultipleUses();
-    void CalculateSethiUllmanNumbers();
-  };
-
-  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
-    BURegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<td_ls_rr_sort>
-    TDRegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<src_ls_rr_sort>
-    SrcRegReductionPriorityQueue;
-
-  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
-    HybridBURRPriorityQueue;
+  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+  // may transfer data dependencies to CopyToReg.
+  if (SU->NumSuccs && N->isMachineOpcode()) {
+    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+    }
+  }
 
-  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
-    ILPBURRPriorityQueue;
+  dumpRegPressure();
 }
 
+//===----------------------------------------------------------------------===//
+//           Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
 /// closestSucc - Returns the scheduled cycle of the successor which is
 /// closest to the current cycle.
 static unsigned closestSucc(const SUnit *SU) {
@@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {
   return Scratches;
 }
 
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
-                     const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+  SmallSet<const SUnit*, 4> Preds;
+  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Preds.insert(I->getSUnit());
+  }
+  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (Preds.count(I->getSUnit()))
+      return true;
+  }
+  return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+  if ((int)SPQ->getCurCycle() < Height) return true;
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return true;
+  return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+                            RegReductionPQBase *SPQ) {
+  // If the two nodes share an operand and one of them has a single
+  // use that is a live out copy, favor the one that is live out. Otherwise
+  // it will be difficult to eliminate the copy if the instruction is a
+  // loop induction variable update. e.g.
+  // BB:
+  // sub r1, r3, #1
+  // str r0, [r2, r3]
+  // mov r3, r1
+  // cmp
+  // bne BB
+  bool SharePred = UnitsSharePred(left, right);
+  // FIXME: Only adjust if BB is a loop back edge.
+  // FIXME: What's the cost of a copy?
+  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+  int LHeight = (int)left->getHeight() - LBonus;
+  int RHeight = (int)right->getHeight() - RBonus;
+
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+    BUHasStall(left, LHeight, SPQ);
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+    BUHasStall(right, RHeight, SPQ);
+
+  // If scheduling one of the node will cause a pipeline stall, delay it.
+  // If scheduling either one of the node will cause a pipeline stall, sort
+  // them according to their height.
+  if (LStall) {
+    if (!RStall)
+      return 1;
+    if (LHeight != RHeight)
+      return LHeight > RHeight ? 1 : -1;
+  } else if (RStall)
+    return -1;
+
+  // If either node is scheduling for latency, sort them by height/depth
+  // and latency.
+  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+                     right->SchedulingPref == Sched::Latency)) {
+    if (DisableSchedCycles) {
+      if (LHeight != RHeight)
+        return LHeight > RHeight ? 1 : -1;
+    }
+    else {
+      // If neither instruction stalls (!LStall && !RStall) then
+      // it's height is already covered so only its depth matters. We also reach
+      // this if both stall but have the same height.
+      unsigned LDepth = left->getDepth();
+      unsigned RDepth = right->getDepth();
+      if (LDepth != RDepth) {
+        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
+              << ") depth " << LDepth << " vs SU (" << right->NodeNum
+              << ") depth " << RDepth << "\n");
+        return LDepth < RDepth ? 1 : -1;
+      }
+    }
+    if (left->Latency != right->Latency)
+      return left->Latency > right->Latency ? 1 : -1;
+  }
+  return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   if (LPriority != RPriority)
@@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,
   if (LScratch != RScratch)
     return LScratch > RScratch;
 
-  if (left->getHeight() != right->getHeight())
-    return left->getHeight() > right->getHeight();
-  
-  if (left->getDepth() != right->getDepth())
-    return left->getDepth() < right->getDepth();
+  if (!DisableSchedCycles) {
+    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  else {
+    if (left->getHeight() != right->getHeight())
+      return left->getHeight() > right->getHeight();
 
-  assert(left->NodeQueueId && right->NodeQueueId && 
+    if (left->getDepth() != right->getDepth())
+      return left->getDepth() < right->getDepth();
+  }
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
 // Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   return BURRSort(left, right, SPQ);
 }
 
 // Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   unsigned LOrder = SPQ->getNodeOrdering(left);
   unsigned ROrder = SPQ->getNodeOrdering(right);
 
@@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   return BURRSort(left, right, SPQ);
 }
 
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  static const unsigned ReadyDelay = 3;
+
+  if (SPQ->MayReduceRegPressure(SU)) return true;
+
+  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
-  if (LHigh && !RHigh)
+  if (LHigh && !RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
+          << right->NodeNum << ")\n");
     return true;
-  else if (!LHigh && RHigh)
+  }
+  else if (!LHigh && RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
+          << left->NodeNum << ")\n");
     return false;
+  }
   else if (!LHigh && !RHigh) {
-    // Low register pressure situation, schedule for latency if possible.
-    bool LStall = left->SchedulingPref == Sched::Latency &&
-      SPQ->getCurCycle() < left->getHeight();
-    bool RStall = right->SchedulingPref == Sched::Latency &&
-      SPQ->getCurCycle() < right->getHeight();
-    // If scheduling one of the node will cause a pipeline stall, delay it.
-    // If scheduling either one of the node will cause a pipeline stall, sort
-    // them according to their height.
-    // If neither will cause a pipeline stall, try to reduce register pressure.
-    if (LStall) {
-      if (!RStall)
-        return true;
-      if (left->getHeight() != right->getHeight())
-        return left->getHeight() > right->getHeight();
-    } else if (RStall)
-      return false;
-
-    // If either node is scheduling for latency, sort them by height and latency
-    // first.
-    if (left->SchedulingPref == Sched::Latency ||
-        right->SchedulingPref == Sched::Latency) {
-      if (left->getHeight() != right->getHeight())
-        return left->getHeight() > right->getHeight();
-      if (left->Latency != right->Latency)
-        return left->Latency > right->Latency;
-    }
+    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
   }
-
   return BURRSort(left, right, SPQ);
 }
 
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
-                                const SUnit *right) const {
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  if (SU->getHeight() > CurCycle) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return SU->getHeight() <= CurCycle;
+}
+
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
@@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,
   return BURRSort(left, right, SPQ);
 }
 
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+//                    Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
   if (SU->isTwoAddress) {
     unsigned Opc = SU->getNode()->getMachineOpcode();
     const TargetInstrDesc &TID = TII->get(Opc);
@@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
   return false;
 }
 
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;
-    const SUnit *SuccSU = I->getSUnit();
-    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
-      return true;
-  }
-  return false;
-}
-
 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
 /// physical register defs.
 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
   const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
-       SUNode = SUNode->getFlaggedNode()) {
+       SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
     const unsigned *SUImpDefs =
@@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       return false;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
       EVT VT = N->getValueType(i);
-      if (VT == MVT::Flag || VT == MVT::Other)
+      if (VT == MVT::Glue || VT == MVT::Other)
         continue;
       if (!N->hasAnyUseOfValue(i))
         continue;
@@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
 /// after N, which shortens the U->N live range, reducing
 /// register pressure.
 ///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
   // Visit all the nodes in topological order, working top-down.
   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
     SUnit *SU = &(*SUnits)[i];
@@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
     if (PredSU->NumSuccs == 1)
       continue;
     // Avoid prescheduling to copies from virtual registers, which don't behave
-    // like other nodes from the perspective of scheduling // heuristics.
+    // like other nodes from the perspective of scheduling heuristics.
     if (SDNode *N = SU->getNode())
       if (N->getOpcode() == ISD::CopyFromReg &&
           TargetRegisterInfo::isVirtualRegister
@@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 /// one that has a CopyToReg use (more likely to be a loop induction update).
 /// If both are two-address, but one is commutable while the other is not
 /// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
   for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
     SUnit *SU = &(*SUnits)[i];
     if (!SU->isTwoAddress)
       continue;
 
     SDNode *Node = SU->getNode();
-    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
       continue;
 
+    bool isLiveOut = hasOnlyLiveOutUses(SU);
     unsigned Opc = Node->getMachineOpcode();
     const TargetInstrDesc &TID = TII->get(Opc);
     unsigned NumRes = TID.getNumDefs();
@@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
             SuccOpc == TargetOpcode::SUBREG_TO_REG)
           continue;
         if ((!canClobber(SuccSU, DUSU) ||
-             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
@@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
   }
 }
 
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
-  SethiUllmanNumbers.assign(SUnits->size(), 0);
-  
-  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
-    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
 /// predecessors of the successors of the SUnit SU. Stop when the provided
 /// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
                                                     unsigned Limit) {
   unsigned Sum = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
   if (left->NumSuccsLeft != right->NumSuccsLeft)
     return left->NumSuccsLeft > right->NumSuccsLeft;
 
-  assert(left->NodeQueueId && right->NodeQueueId && 
+  assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
   return (left->NodeQueueId > right->NodeQueueId);
 }
@@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
 //===----------------------------------------------------------------------===//
 
 llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   BURegReductionPriorityQueue *PQ =
     new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   TDRegReductionPriorityQueue *PQ =
     new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  
+
   SrcRegReductionPriorityQueue *PQ =
     new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   const TargetLowering *TLI = &IS->getTargetLowering();
-  
+
   HybridBURRPriorityQueue *PQ =
     new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
 
 llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+                                CodeGenOpt::Level OptLevel) {
   const TargetMachine &TM = IS->TM;
   const TargetInstrInfo *TII = TM.getInstrInfo();
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   const TargetLowering *TLI = &IS->getTargetLowering();
-  
+
   ILPBURRPriorityQueue *PQ =
     new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
-  return SD;  
+  return SD;
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82ab145a..477c1ffe65d3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -34,8 +34,8 @@ using namespace llvm;
 STATISTIC(LoadsClustered, "Number of loads clustered together");
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-  : ScheduleDAG(mf) {
-}
+  : ScheduleDAG(mf),
+    InstrItins(mf.getTarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SUnit *SU = NewSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
+  SU->isCall = Old->isCall;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
 /// a specified operand is a physical register dependency. If so, returns the
 /// register and the cost of copying the register.
 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
-                                      const TargetRegisterInfo *TRI, 
+                                      const TargetRegisterInfo *TRI,
                                       const TargetInstrInfo *TII,
                                       unsigned &PhysReg, int &Cost) {
   if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
   }
 }
 
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
-                     SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
   SmallVector<EVT, 4> VTs;
-  SDNode *FlagDestNode = Flag.getNode();
+  SDNode *GlueDestNode = Glue.getNode();
 
-  // Don't add a flag from a node to itself.
-  if (FlagDestNode == N) return;
+  // Don't add glue from a node to itself.
+  if (GlueDestNode == N) return;
 
-  // Don't add a flag to something which already has a flag.
-  if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+  // Don't add glue to something which already has glue.
+  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
 
   for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
     VTs.push_back(N->getValueType(I));
 
-  if (AddFlag)
-    VTs.push_back(MVT::Flag);
+  if (AddGlue)
+    VTs.push_back(MVT::Glue);
 
   SmallVector<SDValue, 4> Ops;
   for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
     Ops.push_back(N->getOperand(I));
 
-  if (FlagDestNode)
-    Ops.push_back(Flag);
+  if (GlueDestNode)
+    Ops.push_back(Glue);
 
   SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
   MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
     MN->setMemRefs(Begin, End);
 }
 
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
 /// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
 /// outputs to ensure they are scheduled together and in order. This
 /// optimization may benefit some targets by improving cache locality.
 void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
   if (NumLoads == 0)
     return;
 
-  // Cluster loads by adding MVT::Flag outputs and inputs. This also
+  // Cluster loads by adding MVT::Glue outputs and inputs. This also
   // ensure they are scheduled in order of increasing addresses.
   SDNode *Lead = Loads[0];
-  AddFlags(Lead, SDValue(0, 0), true, DAG);
+  AddGlue(Lead, SDValue(0, 0), true, DAG);
 
-  SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+  SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
   for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
-    bool OutFlag = I < E - 1;
+    bool OutGlue = I < E - 1;
     SDNode *Load = Loads[I];
 
-    AddFlags(Load, InFlag, OutFlag, DAG);
+    AddGlue(Load, InGlue, OutGlue, DAG);
 
-    if (OutFlag)
-      InFlag = SDValue(Load, Load->getNumValues() - 1);
+    if (OutGlue)
+      InGlue = SDValue(Load, Load->getNumValues() - 1);
 
     ++LoadsClustered;
   }
@@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   // FIXME: Multiply by 2 because we may clone nodes during scheduling.
   // This is a temporary workaround.
   SUnits.reserve(NumNodes * 2);
-  
+
   // Add all nodes in depth first order.
   SmallVector<SDNode*, 64> Worklist;
   SmallPtrSet<SDNode*, 64> Visited;
   Worklist.push_back(DAG->getRoot().getNode());
   Visited.insert(DAG->getRoot().getNode());
-  
+
   while (!Worklist.empty()) {
     SDNode *NI = Worklist.pop_back_val();
-    
+
     // Add all operands to the worklist unless they've already been added.
     for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
       if (Visited.insert(NI->getOperand(i).getNode()))
         Worklist.push_back(NI->getOperand(i).getNode());
-  
+
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
       continue;
-    
+
     // If this node has already been processed, stop now.
     if (NI->getNodeId() != -1) continue;
-    
+
     SUnit *NodeSUnit = NewSUnit(NI);
-    
-    // See if anything is flagged to this node, if so, add them to flagged
-    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
-    // are required to be the last operand and result of a node.
-    
-    // Scan up to find flagged preds.
+
+    // See if anything is glued to this node, if so, add them to glued
+    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
+    // is required to be the last operand and result of a node.
+
+    // Scan up to find glued preds.
     SDNode *N = NI;
     while (N->getNumOperands() &&
-           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
       N = N->getOperand(N->getNumOperands()-1).getNode();
       assert(N->getNodeId() == -1 && "Node already inserted!");
       N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
     }
-    
-    // Scan down to find any flagged succs.
+
+    // Scan down to find any glued succs.
     N = NI;
-    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
-      SDValue FlagVal(N, N->getNumValues()-1);
-      
-      // There are either zero or one users of the Flag result.
-      bool HasFlagUse = false;
-      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+      SDValue GlueVal(N, N->getNumValues()-1);
+
+      // There are either zero or one users of the Glue result.
+      bool HasGlueUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
            UI != E; ++UI)
-        if (FlagVal.isOperandOf(*UI)) {
-          HasFlagUse = true;
+        if (GlueVal.isOperandOf(*UI)) {
+          HasGlueUse = true;
           assert(N->getNodeId() == -1 && "Node already inserted!");
           N->setNodeId(NodeSUnit->NodeNum);
           N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
           break;
         }
-      if (!HasFlagUse) break;
+      if (!HasGlueUse) break;
     }
-    
-    // If there are flag operands involved, N is now the bottom-most node
-    // of the sequence of nodes that are flagged together.
+
+    // If there are glue operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are glued together.
     // Update the SUnit.
     NodeSUnit->setNode(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NodeSUnit->NodeNum);
 
+    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+    InitNumRegDefsLeft(NodeSUnit);
+
     // Assign the Latency field of NodeSUnit using target-provided information.
     ComputeLatency(NodeSUnit);
   }
@@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
     SUnit *SU = &SUnits[su];
     SDNode *MainNode = SU->getNode();
-    
+
     if (MainNode->isMachineOpcode()) {
       unsigned Opc = MainNode->getMachineOpcode();
       const TargetInstrDesc &TID = TII->get(Opc);
@@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
       if (TID.isCommutable())
         SU->isCommutable = true;
     }
-    
+
     // Find all predecessors and successors of the group.
-    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
       if (N->isMachineOpcode() &&
           TII->get(N->getMachineOpcode()).getImplicitDefs()) {
         SU->hasPhysRegClobbers = true;
@@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
           SU->hasPhysRegDefs = true;
       }
-      
+
       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
         SDNode *OpN = N->getOperand(i).getNode();
         if (isPassiveNode(OpN)) continue;   // Not scheduled.
@@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         if (OpSU == SU) continue;           // In the same group.
 
         EVT OpVT = N->getOperand(i).getValueType();
-        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
         bool isChain = OpVT == MVT::Other;
 
         unsigned PhysReg = 0;
@@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
-        SU->addPred(dep);
+        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+          // Multiple register uses are combined in the same SUnit. For example,
+          // we could have a set of glued nodes with all their defs consumed by
+          // another set of glued nodes. Register pressure tracking sees this as
+          // a single use, so to keep pressure balanced we reduce the defs.
+          --OpSU->NumRegDefsLeft;
+        }
       }
     }
   }
@@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
 /// are input.  This SUnit graph is similar to the SelectionDAG, but
 /// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   // Cluster certain nodes which should be scheduled together.
   ClusterNodes();
@@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
   AddSchedEdges();
 }
 
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+  if (!Node->isMachineOpcode()) {
+    if (Node->getOpcode() == ISD::CopyFromReg)
+      NodeNumDefs = 1;
+    else
+      NodeNumDefs = 0;
+    return;
+  }
+  unsigned POpc = Node->getMachineOpcode();
+  if (POpc == TargetOpcode::IMPLICIT_DEF) {
+    // No register need be allocated for this.
+    NodeNumDefs = 0;
+    return;
+  }
+  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+  // Some instructions define regs that are not represented in the selection DAG
+  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+  DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+                                           const ScheduleDAGSDNodes *SD)
+  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+  InitNodeNumDefs();
+  Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+  for (;Node;) { // Visit all glued nodes.
+    for (;DefIdx < NodeNumDefs; ++DefIdx) {
+      if (!Node->hasAnyUseOfValue(DefIdx))
+        continue;
+      if (Node->isMachineOpcode() &&
+          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+        // Propagate the incoming (full-register) type. I doubt it's needed.
+        ValueType = Node->getOperand(0).getValueType();
+      }
+      else {
+        ValueType = Node->getValueType(DefIdx);
+      }
+      ++DefIdx;
+      return; // Found a normal regdef.
+    }
+    Node = Node->getGluedNode();
+    if (Node == NULL) {
+      return; // No values left to visit.
+    }
+    InitNodeNumDefs();
+  }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+    ++SU->NumRegDefsLeft;
+  }
+}
+
 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   // Check to see if the scheduler cares about latencies.
   if (ForceUnitLatencies()) {
@@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
     return;
   }
 
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty()) {
+  if (!InstrItins || InstrItins->isEmpty()) {
     SU->Latency = 1;
     return;
   }
-  
+
   // Compute the latency for the node.  We use the sum of the latencies for
-  // all nodes flagged together into this SUnit.
+  // all nodes glued together into this SUnit.
   SU->Latency = 0;
-  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-    if (N->isMachineOpcode()) {
-      SU->Latency += InstrItins.
-        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
-    }
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
 void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
   if (ForceUnitLatencies())
     return;
 
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  if (InstrItins.isEmpty())
-    return;
-  
   if (dep.getKind() != SDep::Data)
     return;
 
   unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
-  if (Def->isMachineOpcode()) {
-    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
-    if (DefIdx >= II.getNumDefs())
-      return;
-    int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
-    if (DefCycle < 0)
-      return;
-    int UseCycle = 1;
-    if (Use->isMachineOpcode()) {
-      const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
-      UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
-    }
-    if (UseCycle >= 0) {
-      int Latency = DefCycle - UseCycle + 1;
-      if (Latency >= 0)
-        dep.setLatency(Latency);
-    }
+  if (Use->isMachineOpcode())
+    // Adjust the use operand index by num of defs.
+    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+      !BB->succ_empty()) {
+    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      // This copy is a liveout value. It is likely coalesced, so reduce the
+      // latency so not to penalize the def.
+      // FIXME: need target specific adjustment here?
+      Latency = (Latency > 1) ? Latency - 1 : 1;
   }
+  if (Latency >= 0)
+    dep.setLatency(Latency);
 }
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
 
   SU->getNode()->dump(DAG);
   dbgs() << "\n";
-  SmallVector<SDNode *, 4> FlaggedNodes;
-  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
-    FlaggedNodes.push_back(N);
-  while (!FlaggedNodes.empty()) {
+  SmallVector<SDNode *, 4> GluedNodes;
+  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+    GluedNodes.push_back(N);
+  while (!GluedNodes.empty()) {
     dbgs() << "    ";
-    FlaggedNodes.back()->dump(DAG);
+    GluedNodes.back()->dump(DAG);
     dbgs() << "\n";
-    FlaggedNodes.pop_back();
+    GluedNodes.pop_back();
   }
 }
 
@@ -507,37 +573,25 @@ namespace {
   };
 }
 
-// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule uses to determine how to insert dbg_value
-// instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
-                           InstrEmitter &Emitter,
-                           DenseMap<SDValue, unsigned> &VRBaseMap,
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+                               InstrEmitter &Emitter,
                     SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
-                           SmallSet<unsigned, 8> &Seen) {
-  unsigned Order = DAG->GetOrdering(N);
-  if (!Order || !Seen.insert(Order))
-    return;
-
-  MachineBasicBlock *BB = Emitter.getBlock();
-  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
-    // Did not insert any instruction.
-    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
-    return;
-  }
-
-  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                            unsigned Order) {
   if (!N->getHasDebugValue())
     return;
+
   // Opportunistically insert immediate dbg_value uses, i.e. those with source
   // order number right after the N.
+  MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
   SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
   for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
     if (DVs[i]->isInvalidated())
       continue;
     unsigned DVOrder = DVs[i]->getOrder();
-    if (DVOrder == ++Order) {
+    if (!Order || DVOrder == ++Order) {
       MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
       if (DbgMI) {
         Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
   }
 }
 
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+                           InstrEmitter &Emitter,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                           SmallSet<unsigned, 8> &Seen) {
+  unsigned Order = DAG->GetOrdering(N);
+  if (!Order || !Seen.insert(Order)) {
+    // Process any valid SDDbgValues even if node does not have any order
+    // assigned.
+    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+    return;
+  }
+
+  MachineBasicBlock *BB = Emitter.getBlock();
+  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+    // Did not insert any instruction.
+    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+    return;
+  }
+
+  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
 
 /// EmitSchedule - Emit the machine code in scheduled order.
 MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
@@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     }
 
     // For pre-regalloc scheduling, create instructions corresponding to the
-    // SDNode and any flagged SDNodes and append them to the block.
+    // SDNode and any glued SDNodes and append them to the block.
     if (!SU->getNode()) {
       // Emit a copy.
       EmitPhysRegCopy(SU, CopyVRBaseMap);
       continue;
     }
 
-    SmallVector<SDNode *, 4> FlaggedNodes;
-    for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
-         N = N->getFlaggedNode())
-      FlaggedNodes.push_back(N);
-    while (!FlaggedNodes.empty()) {
-      SDNode *N = FlaggedNodes.back();
-      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode()->getGluedNode(); N;
+         N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      SDNode *N = GluedNodes.back();
+      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                        VRBaseMap);
       // Remember the source order of the inserted instruction.
       if (HasDbg)
         ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
-      FlaggedNodes.pop_back();
+      GluedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                      VRBaseMap);
@@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
       // Insert all SDDbgValue's whose order(s) are before "Order".
       if (!MI)
         continue;
-#ifndef NDEBUG
-      unsigned LastDIOrder = 0;
-#endif
       for (; DI != DE &&
              (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
-        assert((*DI)->getOrder() >= LastDIOrder &&
-               "SDDbgValue nodes must be in source order!");
-        LastDIOrder = (*DI)->getOrder();
-#endif
         if ((*DI)->isInvalidated())
           continue;
         MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c72703..cc7310e4ca42 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
 
 namespace llvm {
   /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
-  /// 
+  ///
   /// Edges between SUnits are initially based on edges in the SelectionDAG,
   /// and additional edges can be added by the schedulers as heuristics.
   /// SDNodes such as Constants, Registers, and a few others that are not
   /// interesting to schedulers are not allocated SUnits.
   ///
-  /// SDNodes with MVT::Flag operands are grouped along with the flagged
+  /// SDNodes with MVT::Glue operands are grouped along with the flagged
   /// nodes into a single SUnit so that they are scheduled together.
   ///
   /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@ namespace llvm {
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
     SelectionDAG *DAG;                    // DAG of the current basic block
+    const InstrItineraryData *InstrItins;
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
@@ -72,13 +73,17 @@ namespace llvm {
     /// predecessors / successors info nor the temporary scheduling states.
     ///
     SUnit *Clone(SUnit *N);
-    
+
     /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
     virtual void BuildSchedGraph(AliasAnalysis *AA);
 
+    /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+    ///
+    void InitNumRegDefsLeft(SUnit *SU);
+
     /// ComputeLatency - Compute node latency.
     ///
     virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +110,30 @@ namespace llvm {
 
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
+    /// RegDefIter - In place iteration over the values defined by an
+    /// SUnit. This does not need copies of the iterator or any other STLisms.
+    /// The iterator creates itself, rather than being provided by the SchedDAG.
+    class RegDefIter {
+      const ScheduleDAGSDNodes *SchedDAG;
+      const SDNode *Node;
+      unsigned DefIdx;
+      unsigned NodeNumDefs;
+      EVT ValueType;
+    public:
+      RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+      bool IsValid() const { return Node != NULL; }
+
+      EVT GetValue() const {
+        assert(IsValid() && "bad iterator");
+        return ValueType;
+      }
+
+      void Advance();
+    private:
+      void InitNodeNumDefs();
+    };
+
   private:
     /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
     /// combined SUnits.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad06ebda5b00..2fb2f2d8aa1e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
 /// BUILD_VECTOR where all of the elements are ~0 or undef.
 bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // Look through a bit convert.
-  if (N->getOpcode() == ISD::BIT_CONVERT)
+  if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0).getNode();
 
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
 /// BUILD_VECTOR where all of the elements are 0 or undef.
 bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   // Look through a bit convert.
-  if (N->getOpcode() == ISD::BIT_CONVERT)
+  if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0).getNode();
 
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {
   if (N->getOperand(0).getOpcode() == ISD::UNDEF)
     return false;
   unsigned NumElems = N->getNumOperands();
+  if (NumElems == 1)
+    return false;
   for (unsigned i = 1; i < NumElems; ++i) {
     SDValue V = N->getOperand(i);
     if (V.getOpcode() != ISD::UNDEF)
@@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
 
 /// doNotCSE - Return true if CSE should not be performed for this node.
 static bool doNotCSE(SDNode *N) {
-  if (N->getValueType(0) == MVT::Flag)
+  if (N->getValueType(0) == MVT::Glue)
     return true; // Never CSE anything that produces a flag.
 
   switch (N->getOpcode()) {
@@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {
 
   // Check that remaining values produced are not flags.
   for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
-    if (N->getValueType(i) == MVT::Flag)
+    if (N->getValueType(i) == MVT::Glue)
       return true; // Never CSE anything that produces a flag.
 
   return false;
@@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
 bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   bool Erased = false;
   switch (N->getOpcode()) {
-  case ISD::EntryToken:
-    llvm_unreachable("EntryToken should not be in CSEMaps!");
-    return false;
   case ISD::HANDLENODE: return false;  // noop.
   case ISD::CONDCODE:
     assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   }
   default:
     // Remove it from the CSE Map.
+    assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+    assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
     Erased = CSEMap.RemoveNode(N);
     break;
   }
@@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   // Verify that the node was actually in one of the CSE maps, unless it has a
   // flag result (which cannot be CSE'd) or is one of the special cases that are
   // not subject to CSE.
-  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
     dbgs() << "\n";
@@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   return Node;
 }
 
-/// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node.  Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
   switch (N->getOpcode()) {
   default:
     break;
@@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {
   }
 }
 
+/// VerifySDNode - Sanity check the given SDNode.  Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+  // The SDNode allocators cannot be used to allocate nodes with fields that are
+  // not present in an SDNode!
+  assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+  assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+  assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+  assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+  assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+  assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+  assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+  assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+  assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+  assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+  assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+  assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+  assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+  assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+  assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+  assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+  assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+  assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+  assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+  VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode.  Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+  // The MachineNode allocators cannot be used to allocate nodes with fields
+  // that are not present in a MachineNode!
+  // Currently there are no such nodes.
+
+  VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
 /// getEVTAlignment - Compute the default alignment value for the
 /// given type.
 ///
@@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
@@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
   ID.AddPointer(MD);
-  
+
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-  
+
   SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
@@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Also compute a conserative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     unsigned TrailZ = KnownZero.countTrailingOnes() +
                       KnownZero2.countTrailingOnes();
     unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
@@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
                       AllOnes, KnownZero2, KnownOne2, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
-    KnownOne2.clear();
-    KnownZero2.clear();
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
     ComputeMaskedBits(Op.getOperand(1),
                       AllOnes, KnownZero2, KnownOne2, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // If the sign extended bits are demanded, we know that the sign
     // bit is demanded.
-    InSignBit.zext(BitWidth);
+    InSignBit = InSignBit.zext(BitWidth);
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
@@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::CTPOP: {
     unsigned LowBits = Log2_32(BitWidth)+1;
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     return;
   }
   case ISD::LOAD: {
@@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask    = Mask;
-    InMask.trunc(InBits);
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    APInt InMask    = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
     return;
   }
@@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask = Mask;
-    InMask.trunc(InBits);
+    APInt InMask = Mask.trunc(InBits);
 
     // If any of the sign extended bits are demanded, we know that the sign
     // bit is demanded. Temporarily set this bit in the mask for our callee.
     if (NewBits.getBoolValue())
       InMask |= InSignBit;
 
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
 
     // Note if the sign bit is known to be zero or one.
@@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // If the sign bit wasn't actually demanded by our caller, we don't
     // want it set in the KnownZero and KnownOne result values. Reset the
     // mask and reapply it to the result values.
-    InMask = Mask;
-    InMask.trunc(InBits);
+    InMask = Mask.trunc(InBits);
     KnownZero &= InMask;
     KnownOne  &= InMask;
 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
 
     // If the sign bit is known zero or one, the top bits match.
     if (SignBitKnownZero)
@@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask;
-    InMask.trunc(InBits);
-    KnownZero.trunc(InBits);
-    KnownOne.trunc(InBits);
+    APInt InMask = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     return;
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask;
-    InMask.zext(InBits);
-    KnownZero.zext(InBits);
-    KnownOne.zext(InBits);
+    APInt InMask = Mask.zext(InBits);
+    KnownZero = KnownZero.zext(InBits);
+    KnownOne = KnownOne.zext(InBits);
     ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
     break;
   }
   case ISD::AssertZext: {
@@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     }
   }
   // fall through
-  case ISD::ADD: {
+  case ISD::ADD:
+  case ISD::ADDE: {
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
@@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZeroOut = std::min(KnownZeroOut,
                             KnownZero2.countTrailingOnes());
 
-    KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+    if (Op.getOpcode() == ISD::ADD) {
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      return;
+    }
+
+    // With ADDE, a carry bit may be added in, so we can only use this
+    // information if we know (at least) that the low two bits are clear.  We
+    // then return to the caller that the low bit is unknown but that other bits
+    // are known zero.
+    if (KnownZeroOut >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
     return;
   }
   case ISD::SREM:
@@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
-    KnownOne.clear();
+    KnownOne.clearAllBits();
     KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
     return;
   }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    if (unsigned Align = InferPtrAlignment(Op)) {
+      // The low bits are known zero if the pointer is aligned.
+      KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+      return;
+    }
+    break;
+      
   default:
     // Allow the target to implement this method for its nodes.
     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
 }
 
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD.  This handles the equivalence:
+///     X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+  if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+      !isa<ConstantSDNode>(Op.getOperand(1)))
+    return false;
+  
+  if (Op.getOpcode() == ISD::OR && 
+      !MaskedValueIsZero(Op.getOperand(0),
+                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+    return false;
+  
+  return true;
+}
+
+
 bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   // If we're told that NaNs won't happen, assume they won't.
   if (NoNaNsFPMath)
@@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
-      const uint64_t zero[] = {0, 0};
       // No compile time operations on ppcf128.
       if (VT == MVT::ppcf128) break;
-      APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
       return getConstantFP(apf, VT);
     }
-    case ISD::BIT_CONVERT:
+    case ISD::BITCAST:
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
         return getConstantFP(Val.bitsToFloat(), VT);
       else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         APInt api(VT.getSizeInBits(), 2, x);
         return getConstant(api, VT);
       }
-      case ISD::BIT_CONVERT:
+      case ISD::BITCAST:
         if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
           return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
         else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
         return Operand.getNode()->getOperand(0);
     }
     break;
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
     // Basic sanity checking.
     assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
-           && "Cannot BIT_CONVERT between types of different sizes!");
+           && "Cannot BITCAST between types of different sizes!");
     if (VT == Operand.getValueType()) return Operand;  // noop conversion.
-    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x)
-      return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
     break;
@@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+  if (VT != MVT::Glue) { // Don't CSE flag producing nodes
     FoldingSetNodeID ID;
     SDValue Ops[1] = { Operand };
     AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
            "Shift operators return type must be the same as their first arg");
     assert(VT.isInteger() && N2.getValueType().isInteger() &&
            "Shifts only work on integers");
+    // Verify that the shift amount VT is bit enough to hold valid shift
+    // amounts.  This catches things like trying to shift an i1024 value by an
+    // i8, which is easy to fall into in generic code that uses
+    // TLI.getShiftAmount().
+    assert(N2.getValueType().getSizeInBits() >=
+                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) && 
+           "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
     // handle them.  Since we know the size of the shift has to be less than the
@@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       return getConstant(ShiftedVal.trunc(ElementSize), VT);
     }
     break;
-  case ISD::EXTRACT_SUBVECTOR:
-    if (N1.getValueType() == VT) // Trivial extraction.
-      return N1;
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Index = N2;
+    if (VT.isSimple() && N1.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             "Extract subvector VTs must be a vectors!");
+      assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+             "Extract subvector VTs must have the same element type!");
+      assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Extract subvector must be from larger vector to smaller vector!");
+
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((VT.getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= N1.getValueType().getVectorNumElements())
+               && "Extract subvector overflow!");
+      }
+
+      // Trivial extraction.
+      if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+        return N1;
+    }
     break;
   }
+  }
 
   if (N1C) {
     if (N2C) {
@@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   // Memoize this node if possible.
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     SDValue Ops[] = { N1, N2 };
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
     break;
-  case ISD::BIT_CONVERT:
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue Index = N3;
+    if (VT.isSimple() && N1.getValueType().isSimple()
+        && N2.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             N2.getValueType().isVector() &&
+             "Insert subvector VTs must be a vectors");
+      assert(VT == N1.getValueType() &&
+             "Dest and insert subvector source types must match!");
+      assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Insert subvector must be from smaller vector to larger vector!");
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((N2.getValueType().getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= VT.getVectorNumElements())
+               && "Insert subvector overflow!");
+      }
+
+      // Trivial insertion.
+      if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+        return N2;
+    }
+    break;
+  }
+  case ISD::BITCAST:
     // Fold bit_convert nodes from a type to themselves.
     if (N1.getValueType() == VT)
       return N1;
@@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   // Memoize node if it doesn't produce a flag.
   SDNode *N;
   SDVTList VTs = getVTList(VT);
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     SDValue Ops[] = { N1, N2, N3 };
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
                  &ArgChains[0], ArgChains.size());
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
-    APInt Val = APInt(NumBits, C->getZExtValue() & 255);
-    unsigned Shift = 8;
-    for (unsigned i = NumBits; i > 8; i >>= 1) {
-      Val = (Val << Shift) | Val;
-      Shift <<= 1;
-    }
+    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
     if (VT.isInteger())
       return DAG.getConstant(Val, VT);
     return DAG.getConstantFP(APFloat(Val), VT);
   }
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
-  unsigned Shift = 8;
-  for (unsigned i = NumBits; i > 8; i >>= 1) {
-    Value = DAG.getNode(ISD::OR, dl, VT,
-                        DAG.getNode(ISD::SHL, dl, VT, Value,
-                                    DAG.getConstant(Shift,
-                                                    TLI.getShiftAmountTy())),
-                        Value);
-    Shift <<= 1;
+  if (NumBits > 8) {
+    // Use a multiplication with 0x010101... to extend the input to the
+    // required length.
+    APInt Magic = SplatByte(NumBits, 0x01);
+    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
   }
 
   return Value;
@@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   if (Str.empty()) {
     if (VT.isInteger())
       return DAG.getConstant(0, VT);
-    else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
-             VT.getSimpleVT().SimpleTy == MVT::f64)
+    else if (VT == MVT::f32 || VT == MVT::f64)
       return DAG.getConstantFP(0.0, VT);
     else if (VT.isVector()) {
       unsigned NumElts = VT.getVectorNumElements();
       MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
-      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+      return DAG.getNode(ISD::BITCAST, dl, VT,
                          DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
                                                              EltVT, NumElts)));
     } else
@@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
     if (VT.bitsGT(LVT))
       VT = LVT;
   }
-  
-  // If we're optimizing for size, and there is a limit, bump the maximum number
-  // of operations inserted down to 4.  This is a wild guess that approximates
-  // the size of a call to memcpy or memset (3 arguments + call).
-  if (Limit != ~0U) {
-    const Function *F = DAG.getMachineFunction().getFunction();
-    if (F->hasFnAttr(Attribute::OptimizeForSize))
-      Limit = 4;
-  }
 
   unsigned NumMemOps = 0;
   while (Size != 0) {
@@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                        SDValue Src, uint64_t Size,
                                        unsigned Align, bool isVol,
                                        bool AlwaysInline,
-                                       const Value *DstSV, uint64_t DstSVOff,
-                                       const Value *SrcSV, uint64_t SrcSVOff) {
+                                       MachinePointerInfo DstPtrInfo,
+                                       MachinePointerInfo SrcPtrInfo) {
   // Turn a memcpy of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  // TODO: In the AlwaysInline case, if the size is big then generate a loop
+  // rather than maybe a humongous number of loads and stores.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   std::string Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
-  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-  
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
                                 (isZeroStr ? 0 : SrcAlign),
@@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, isVol, false, Align);
+                           DstPtrInfo.getWithOffset(DstOff), isVol,
+                           false, Align);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // FIXME does the case above also need this?
       EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
       assert(NVT.bitsGE(VT));
-      Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+                             SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
                              MinAlign(SrcAlign, SrcOff));
       Store = DAG.getTruncStore(Chain, dl, Value,
                                 getMemBasePlusOffset(Dst, DstOff, DAG),
-                                DstSV, DstSVOff + DstOff, VT, isVol, false,
-                                Align);
+                                DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+                                false, Align);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                         SDValue Src, uint64_t Size,
                                         unsigned Align,  bool isVol,
                                         bool AlwaysInline,
-                                        const Value *DstSV, uint64_t DstSVOff,
-                                        const Value *SrcSV, uint64_t SrcSVOff) {
+                                        MachinePointerInfo DstPtrInfo,
+                                        MachinePointerInfo SrcPtrInfo) {
   // Turn a memmove of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
@@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
     SrcAlign = Align;
-  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
@@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+                        SrcPtrInfo.getWithOffset(SrcOff), isVol,
+                        false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, isVol, false, Align);
+                         DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                SDValue Chain, SDValue Dst,
                                SDValue Src, uint64_t Size,
                                unsigned Align, bool isVol,
-                               const Value *DstSV, uint64_t DstSVOff) {
+                               MachinePointerInfo DstPtrInfo) {
   // Turn a memset of undef to nop.
   if (Src.getOpcode() == ISD::UNDEF)
     return Chain;
@@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   bool NonScalarIntSafe =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
-  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
                                 NonScalarIntSafe, false, DAG, TLI))
     return SDValue();
@@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   SmallVector<SDValue, 8> OutChains;
   uint64_t DstOff = 0;
   unsigned NumMemOps = MemOps.size();
+
+  // Find the largest store and generate the bit pattern for it.
+  EVT LargestVT = MemOps[0];
+  for (unsigned i = 1; i < NumMemOps; i++)
+    if (MemOps[i].bitsGT(LargestVT))
+      LargestVT = MemOps[i];
+  SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
-    unsigned VTSize = VT.getSizeInBits() / 8;
-    SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+    // If this store is smaller than the largest store see whether we can get
+    // the smaller value for free with a truncate.
+    SDValue Value = MemSetValue;
+    if (VT.bitsLT(LargestVT)) {
+      if (!LargestVT.isVector() && !VT.isVector() &&
+          TLI.isTruncateFree(LargestVT, VT))
+        Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+      else
+        Value = getMemsetValue(Src, VT, DAG, dl);
+    }
+    assert(Value.getValueType() == VT && "Value with wrong type.");
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff, isVol, false, 0);
+                                 DstPtrInfo.getWithOffset(DstOff),
+                                 isVol, false, Align);
     OutChains.push_back(Store);
-    DstOff += VTSize;
+    DstOff += VT.getSizeInBits() / 8;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                const Value *DstSV, uint64_t DstSVOff,
-                                const Value *SrcSV, uint64_t SrcSVOff) {
+                                MachinePointerInfo DstPtrInfo,
+                                MachinePointerInfo SrcPtrInfo) {
 
   // Check to see if we should lower the memcpy to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
 
     SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
                                              ConstantSize->getZExtValue(),Align,
-                                isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                isVol, false, DstPtrInfo, SrcPtrInfo);
     if (Result.getNode())
       return Result;
   }
@@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   SDValue Result =
     TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
                                 isVol, AlwaysInline,
-                                DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                DstPtrInfo, SrcPtrInfo);
   if (Result.getNode())
     return Result;
 
@@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
     assert(ConstantSize && "AlwaysInline requires a constant size!");
     return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
                                    ConstantSize->getZExtValue(), Align, isVol,
-                                   true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                   true, DstPtrInfo, SrcPtrInfo);
   }
 
   // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
 SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
                                  unsigned Align, bool isVol,
-                                 const Value *DstSV, uint64_t DstSVOff,
-                                 const Value *SrcSV, uint64_t SrcSVOff) {
+                                 MachinePointerInfo DstPtrInfo,
+                                 MachinePointerInfo SrcPtrInfo) {
 
   // Check to see if we should lower the memmove to loads and stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
     SDValue Result =
       getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
                                ConstantSize->getZExtValue(), Align, isVol,
-                               false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+                               false, DstPtrInfo, SrcPtrInfo);
     if (Result.getNode())
       return Result;
   }
@@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
     TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
-                                 DstSV, DstSVOff, SrcSV, SrcSVOff);
+                                 DstPtrInfo, SrcPtrInfo);
   if (Result.getNode())
     return Result;
 
@@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
 SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol,
-                                const Value *DstSV, uint64_t DstSVOff) {
+                                MachinePointerInfo DstPtrInfo) {
 
   // Check to see if we should lower the memset to stores first.
   // For cases within the target-specified limits, this is the best choice.
@@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
 
     SDValue Result =
       getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
-                      Align, isVol, DstSV, DstSVOff);
+                      Align, isVol, DstPtrInfo);
 
     if (Result.getNode())
       return Result;
@@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   // code. If the target chooses to do this, this is the next best.
   SDValue Result =
     TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
-                                DstSV, DstSVOff);
+                                DstPtrInfo);
   if (Result.getNode())
     return Result;
 
-  // Emit a library call.  
+  // Emit a library call.
   const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
-                                SDValue Chain,
-                                SDValue Ptr, SDValue Cmp,
-                                SDValue Swp, const Value* PtrVal,
+                                SDValue Chain, SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachinePointerInfo PtrInfo,
                                 unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
-  // Check if the memory reference references a frame index
-  if (!PtrVal)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
   MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
@@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   Flags |= MachineMemOperand::MOVolatile;
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrVal, Flags, 0,
-                            MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
 }
@@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
 
-  // Check if the memory reference references a frame index
-  if (!PtrVal)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
   MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 
@@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   Flags |= MachineMemOperand::MOVolatile;
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrVal, Flags, 0,
+    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
                             MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
@@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
 }
 
 /// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
 SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
                                      DebugLoc dl) {
   if (NumOps == 1)
@@ -3803,18 +3931,18 @@ SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
                                   const EVT *VTs, unsigned NumVTs,
                                   const SDValue *Ops, unsigned NumOps,
-                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
-                             MemVT, srcValue, SVOff, Align, Vol,
+                             MemVT, PtrInfo, Align, Vol,
                              ReadMem, WriteMem);
 }
 
 SDValue
 SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   const SDValue *Ops, unsigned NumOps,
-                                  EVT MemVT, const Value *srcValue, int SVOff,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
                                   unsigned Align, bool Vol,
                                   bool ReadMem, bool WriteMem) {
   if (Align == 0)  // Ensure that codegen never sees alignment 0
@@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   if (Vol)
     Flags |= MachineMemOperand::MOVolatile;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(srcValue, Flags, SVOff,
-                            MemVT.getStoreSize(), Align);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
 
   return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
 }
@@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
                                   EVT MemVT, MachineMemOperand *MMO) {
   assert((Opcode == ISD::INTRINSIC_VOID ||
           Opcode == ISD::INTRINSIC_W_CHAIN ||
+          Opcode == ISD::PREFETCH ||
           (Opcode <= INT_MAX &&
            (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
          "Opcode is not a memory-accessing opcode!");
 
   // Memoize the node unless it returns a flag.
   MemIntrinsicSDNode *N;
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   return SDValue(N, 0);
 }
 
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+  // If this is FI+Offset, we can model it.
+  if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+  // If this is (FI+Offset1)+Offset2, we can model it.
+  if (Ptr.getOpcode() != ISD::ADD ||
+      !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+      !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+    return MachinePointerInfo();
+
+  int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+  return MachinePointerInfo::getFixedStack(FI, Offset+
+                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+  // If the 'Offset' value isn't a constant, we can't handle this.
+  if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+    return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+  if (OffsetOp.getOpcode() == ISD::UNDEF)
+    return InferPointerInfo(Ptr);
+  return MachinePointerInfo();
+}
+
+
 SDValue
 SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
-                      const Value *SV, int SVOffset, EVT MemVT,
+                      MachinePointerInfo PtrInfo, EVT MemVT,
                       bool isVolatile, bool isNonTemporal,
-                      unsigned Alignment) {
+                      unsigned Alignment, const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOLoad;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+  // clients.
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr, Offset);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset,
-                            MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            TBAAInfo);
   return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
 }
 
 SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset, EVT MemVT,
                       MachineMemOperand *MMO) {
@@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
 
 SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
-                              const Value *SV, int SVOffset,
+                              MachinePointerInfo PtrInfo,
                               bool isVolatile, bool isNonTemporal,
-                              unsigned Alignment) {
+                              unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
-                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
 }
 
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
-                                 const Value *SV,
-                                 int SVOffset, EVT MemVT,
+                                 MachinePointerInfo PtrInfo, EVT MemVT,
                                  bool isVolatile, bool isNonTemporal,
-                                 unsigned Alignment) {
+                                 unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
-                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 TBAAInfo);
 }
 
+
 SDValue
 SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
                              SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
   assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
          "Load is already a indexed load!");
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
-                 LD->getChain(), Base, Offset, LD->getSrcValue(),
-                 LD->getSrcValueOffset(), LD->getMemoryVT(),
+                 LD->getChain(), Base, Offset, LD->getPointerInfo(),
+                 LD->getMemoryVT(),
                  LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
-                               SDValue Ptr, const Value *SV, int SVOffset,
+                               SDValue Ptr, MachinePointerInfo PtrInfo,
                                bool isVolatile, bool isNonTemporal,
-                               unsigned Alignment) {
+                               unsigned Alignment, const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset,
-                            Val.getValueType().getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags,
+                            Val.getValueType().getStoreSize(), Alignment,
+                            TBAAInfo);
 
   return getStore(Chain, dl, Val, Ptr, MMO);
 }
@@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 }
 
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
-                                    SDValue Ptr, const Value *SV,
-                                    int SVOffset, EVT SVT,
-                                    bool isVolatile, bool isNonTemporal,
-                                    unsigned Alignment) {
+                                    SDValue Ptr, MachinePointerInfo PtrInfo,
+                                    EVT SVT,bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment,
+                                    const MDNode *TBAAInfo) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
-  // Check if the memory reference references a frame index
-  if (!SV)
-    if (const FrameIndexSDNode *FI =
-          dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
-      SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
-  MachineFunction &MF = getMachineFunction();
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+                            TBAAInfo);
 
   return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
 }
@@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   SDNode *N;
   SDVTList VTs = getVTList(VT);
 
-  if (VT != MVT::Flag) {
+  if (VT != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
     void *IP = 0;
@@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
 
   // Memoize the node unless it returns a flag.
   SDNode *N;
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
   }
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifySDNode(N);
 #endif
   return SDValue(N, 0);
 }
@@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
                                   unsigned NumOps) {
   // If an identical node already exists, use it.
   void *IP = 0;
-  if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+  if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
     if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
                              const SDValue *Ops, unsigned NumOps) {
-  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
   MachineSDNode *N;
-  void *IP;
+  void *IP = 0;
 
   if (DoCSE) {
     FoldingSetNodeID ID;
@@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
 
   AllNodes.push_back(N);
 #ifndef NDEBUG
-  VerifyNode(N);
+  VerifyMachineNode(N);
 #endif
   return N;
 }
@@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
 /// else return NULL.
 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
                                       const SDValue *Ops, unsigned NumOps) {
-  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
@@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
     SD->setHasDebugValue(true);
 }
 
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+  if (From == To || !From.getNode()->getHasDebugValue())
+    return;
+  SDNode *FromNode = From.getNode();
+  SDNode *ToNode = To.getNode();
+  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  SmallVector<SDDbgValue *, 2> ClonedDVs;
+  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+       I != E; ++I) {
+    SDDbgValue *Dbg = *I;
+    if (Dbg->getKind() == SDDbgValue::SDNODE) {
+      SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+                                      Dbg->getOffset(), Dbg->getDebugLoc(),
+                                      Dbg->getOrder());
+      ClonedDVs.push_back(Clone);
+    }
+  }
+  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+         E = ClonedDVs.end(); I != E; ++I)
+    AddDbgValue(*I, ToNode, false);
+}
+
 //===----------------------------------------------------------------------===//
 //                              SDNode Class
 //===----------------------------------------------------------------------===//
@@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
 }
 
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
-                     const SDValue *Ops, unsigned NumOps, EVT memvt, 
+                     const SDValue *Ops, unsigned NumOps, EVT memvt,
                      MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
@@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
 namespace {
   struct EVTArray {
     std::vector<EVT> VTs;
-    
+
     EVTArray() {
       VTs.reserve(MVT::LAST_VALUETYPE);
       for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
     sys::SmartScopedLock<true> Lock(*VTMutex);
     return &(*EVTs->insert(VT).first);
   } else {
-    assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+    assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
            "Value type out of range!");
     return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
   }
@@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {
 
 /// reachesChainWithoutSideEffects - Return true if this operand (which must
 /// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions.  In practice, this looks through token
-/// factors and non-volatile loads.  In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path.  In practice, this looks
+/// through token factors and non-volatile loads.  In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
 bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
                                                unsigned Depth) const {
   if (*this == Dest) return true;
@@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
   if (Depth == 0) return false;
 
   // If this is a token factor, all inputs to the TF happen in parallel.  If any
-  // of the operands of the TF reach dest, then we can do the xform.
+  // of the operands of the TF does not reach dest, then we cannot do the xform.
   if (getOpcode() == ISD::TokenFactor) {
     for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-      if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
-        return true;
-    return false;
+      if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+        return false;
+    return true;
   }
 
   // Loads don't have side effects, look through them.
@@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::ExternalSymbol: return "ExternalSymbol";
   case ISD::BlockAddress:  return "BlockAddress";
@@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
   case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
   case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
   case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
   case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
   case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
@@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UINT_TO_FP:  return "uint_to_fp";
   case ISD::FP_TO_SINT:  return "fp_to_sint";
   case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BIT_CONVERT: return "bit_convert";
+  case ISD::BITCAST:     return "bit_convert";
   case ISD::FP16_TO_FP32: return "fp16_to_fp32";
   case ISD::FP32_TO_FP16: return "fp32_to_fp16";
 
@@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << LBB->getName() << " ";
     OS << (const void*)BBDN->getBasicBlock() << ">";
   } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    if (G && R->getReg() &&
-        TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
-      OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
-    } else {
-      OS << " %reg" << R->getReg();
-    }
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
@@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     const char *AM = getIndexedModeName(ST->getAddressingMode());
     if (*AM)
       OS << ", " << AM;
-    
+
     OS << ">";
   } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
     OS << "<" << *M->getMemOperand() << ">";
@@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
 
 static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
                                   const SelectionDAG *G, unsigned depth,
-                                  unsigned indent) 
+                                  unsigned indent)
 {
   if (depth == 0)
     return;
@@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
 void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
                             unsigned depth) const {
   printrWithDepthHelper(OS, this, G, depth, 0);
-} 
+}
 
 void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
   // Don't print impossibly deep things.
@@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
 void SDNode::dumprFull(const SelectionDAG *G) const {
   // Don't print impossibly deep things.
   dumprWithDepth(G, 100);
-} 
+}
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
 }
 
 
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
-/// location that is 'Dist' units away from the location that the 'Base' load 
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
 /// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
                                      unsigned Bytes, int Dist) const {
   if (LD->getChain() != Base->getChain())
     return false;
@@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
     if (FS != BFS || FS != (int)Bytes) return false;
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
-  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
-    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
-    if (V && (V->getSExtValue() == Dist*Bytes))
-      return true;
-  }
+
+  // Handle X+C
+  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
 
   const GlobalValue *GV1 = NULL;
   const GlobalValue *GV2 = NULL;
@@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   int64_t FrameOffset = 0;
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
     FrameIdx = FI->getIndex();
-  } else if (Ptr.getOpcode() == ISD::ADD &&
-             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+  } else if (isBaseWithConstantOffset(Ptr) &&
              isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    // Handle FI+Cst
     FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
     FrameOffset = Ptr.getConstantOperandVal(1);
   }
 
   if (FrameIdx != (1 << 31)) {
-    // FIXME: Handle FI+CST.
     const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
     unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
                                     FrameOffset);
@@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
     if (OpVal.getOpcode() == ISD::UNDEF)
       SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
     else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
-      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+      SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
                     zextOrTrunc(sz) << BitPos;
     else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
       SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   while (sz > 8) {
 
     unsigned HalfSize = sz / 2;
-    APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
-    APInt LowValue = APInt(SplatValue).trunc(HalfSize);
-    APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
-    APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+    APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatValue.trunc(HalfSize);
+    APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+    APInt LowUndef = SplatUndef.trunc(HalfSize);
 
     // If the two halves do not match (ignoring undef bits), stop here.
     if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,
   // If this node has already been checked, don't check it again.
   if (Checked.count(N))
     return;
-  
+
   // If a node has already been visited on this depth-first walk, reject it as
   // a cycle.
   if (!Visited.insert(N)) {
@@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,
     errs() << "Detected cycle in SelectionDAG\n";
     abort();
   }
-  
+
   for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
     checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-  
+
   Checked.insert(N);
   Visited.erase(N);
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e65744592c8b..452f5614b7bf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
 #include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -43,9 +44,8 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",
                  cl::location(LimitFloatPrecision),
                  cl::init(0));
 
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+                  cl::init(64), cl::Hidden);
+
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                       const SDValue *Parts, unsigned NumParts,
                                       EVT PartVT, EVT ValueVT);
-  
+
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
 /// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
   if (ValueVT.isVector())
     return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
-  
+
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Val = Parts[0];
@@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
         Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
                               RoundParts / 2, PartVT, HalfVT);
       } else {
-        Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
+        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
       }
 
       if (TLI.isBigEndian())
@@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
              "Unexpected split");
       SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
+      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
@@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
   }
 
   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
-    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
   return SDValue();
@@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Val = Parts[0];
-  
+
   // Handle a multi-element vector.
   if (NumParts > 1) {
     EVT IntermediateVT, RegisterVT;
@@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
     assert(RegisterVT == Parts[0].getValueType() &&
            "Part type doesn't match part!");
-    
+
     // Assemble the parts into intermediate operands.
     SmallVector<SDValue, 8> Ops(NumIntermediates);
     if (NumIntermediates == NumParts) {
@@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
                                   PartVT, IntermediateVT);
     }
-    
+
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
     // intermediate operands.
     Val = DAG.getNode(IntermediateVT.isVector() ?
                       ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
                       ValueVT, &Ops[0], NumIntermediates);
   }
-  
+
   // There is now one part, held in Val.  Correct it to match ValueVT.
   PartVT = Val.getValueType();
-  
+
   if (PartVT == ValueVT)
     return Val;
-  
+
   if (PartVT.isVector()) {
     // If the element type of the source/dest vectors are the same, but the
     // parts vector has more elements than the value vector, then we have a
@@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
              "Cannot narrow, it would be a lossy transformation");
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
                          DAG.getIntPtrConstant(0));
-    }                                      
-    
+    }
+
     // Vector/Vector bitcast.
-    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
   }
-  
+
   assert(ValueVT.getVectorElementType() == PartVT &&
          ValueVT.getVectorNumElements() == 1 &&
          "Only trivial scalar-to-vector conversions should get here!");
@@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
                                  EVT PartVT);
-  
+
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
@@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                            EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
   EVT ValueVT = Val.getValueType();
-  
+
   // Handle the vector case separately.
   if (ValueVT.isVector())
     return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
-  
+
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
@@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
       assert(PartVT.isInteger() && ValueVT.isInteger() &&
-             "Unknown mismatch!");             
+             "Unknown mismatch!");
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
     assert(NumParts == 1 && PartVT != ValueVT);
-    Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
     assert(PartVT.isInteger() && ValueVT.isInteger() &&
@@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
 
   // The number of parts is a power of 2.  Repeatedly bisect the value using
   // EXTRACT_ELEMENT.
-  Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
                          EVT::getIntegerVT(*DAG.getContext(),
                                            ValueVT.getSizeInBits()),
                          Val);
@@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                           ThisVT, Part0, DAG.getIntPtrConstant(0));
 
       if (ThisBits == PartBits && ThisVT != PartVT) {
-        Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
-        Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
+        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
       }
     }
   }
@@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  
+
   if (NumParts == 1) {
     if (PartVT == ValueVT) {
       // Nothing to do.
     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
       // Bitconvert vector->vector case.
-      Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
                PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
@@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                                   ElementVT, Val, DAG.getIntPtrConstant(i)));
-      
+
       for (unsigned i = ValueVT.getVectorNumElements(),
            e = PartVT.getVectorNumElements(); i != e; ++i)
         Ops.push_back(DAG.getUNDEF(ElementVT));
@@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
 
       // FIXME: Use CONCAT for 2x -> 4x.
-      
+
       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else {
@@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                         PartVT, Val, DAG.getIntPtrConstant(0));
     }
-    
+
     Parts[0] = Val;
     return;
   }
-  
+
   // Handle a multi-element vector.
   EVT IntermediateVT, RegisterVT;
   unsigned NumIntermediates;
@@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                                 IntermediateVT,
                                                 NumIntermediates, RegisterVT);
   unsigned NumElements = ValueVT.getVectorNumElements();
-  
+
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-  
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                            IntermediateVT, Val, DAG.getIntPtrConstant(i));
   }
-  
+
   // Split the intermediate operands into legal parts.
   if (NumParts == NumIntermediates) {
     // If the register was not expanded, promote or copy the value,
@@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
       }
 
       Chain = P.getValue(1);
+      Parts[i] = P;
 
       // If the source register was virtual and if we know something about it,
       // add an assert node.
-      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
-          RegisterVT.isInteger() && !RegisterVT.isVector()) {
-        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
-        if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
-          const FunctionLoweringInfo::LiveOutInfo &LOI =
-            FuncInfo.LiveOutRegInfo[SlotNo];
-
-          unsigned RegSize = RegisterVT.getSizeInBits();
-          unsigned NumSignBits = LOI.NumSignBits;
-          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
-          // FIXME: We capture more information than the dag can represent.  For
-          // now, just use the tightest assertzext/assertsext possible.
-          bool isSExt = true;
-          EVT FromVT(MVT::Other);
-          if (NumSignBits == RegSize)
-            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
-          else if (NumZeroBits >= RegSize-1)
-            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
-          else if (NumSignBits > RegSize-8)
-            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
-          else if (NumZeroBits >= RegSize-8)
-            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
-          else if (NumSignBits > RegSize-16)
-            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
-          else if (NumZeroBits >= RegSize-16)
-            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
-          else if (NumSignBits > RegSize-32)
-            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
-          else if (NumZeroBits >= RegSize-32)
-            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
-          if (FromVT != MVT::Other)
-            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
-                            RegisterVT, P, DAG.getValueType(FromVT));
-        }
-      }
+      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+          !RegisterVT.isInteger() || RegisterVT.isVector() ||
+          !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+        continue;
+      
+      const FunctionLoweringInfo::LiveOutInfo &LOI =
+        FuncInfo.LiveOutRegInfo[Regs[Part+i]];
+
+      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned NumSignBits = LOI.NumSignBits;
+      unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+      // FIXME: We capture more information than the dag can represent.  For
+      // now, just use the tightest assertzext/assertsext possible.
+      bool isSExt = true;
+      EVT FromVT(MVT::Other);
+      if (NumSignBits == RegSize)
+        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+      else if (NumZeroBits >= RegSize-1)
+        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+      else if (NumSignBits > RegSize-8)
+        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+      else if (NumZeroBits >= RegSize-8)
+        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+      else if (NumSignBits > RegSize-16)
+        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+      else if (NumZeroBits >= RegSize-16)
+        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+      else if (NumSignBits > RegSize-32)
+        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+      else if (NumZeroBits >= RegSize-32)
+        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+      else
+        continue;
 
-      Parts[i] = P;
+      // Add an assertion node.
+      assert(FromVT != MVT::Other);
+      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                             RegisterVT, P, DAG.getValueType(FromVT));
     }
 
     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
                               Val.getResNo(), Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
-    } else {
-      SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
-                            Offset, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, 0, false);
-    }
+    } else 
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
 }
@@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
     unsigned InReg = It->second;
     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
     SDValue Chain = DAG.getEntryNode();
-    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    resolveDanglingDebugInfo(V, N);
+    return N;
   }
 
   // Otherwise create a new SDValue and remember it.
@@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
       Chains[i] =
         DAG.getStore(Chain, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, false, 0);
+                     // FIXME: better loc info would be nice.
+                     Add, MachinePointerInfo(), false, false, 0);
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
     if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
       return false;
   }
-  
+
   return true;
 }
 
@@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
 
   // If this is a series of conditions that are or'd or and'd together, emit
   // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive, this should improve performance.
   // For example, instead of something like:
   //     cmp A, B
   //     C = seteq
@@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     jle foo
   //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (BOp->hasOneUse() &&
+    if (!TLI.isJumpExpensive() && 
+        BOp->hasOneUse() &&
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
                                MVT::Other, getControlRoot(), Cond,
                                DAG.getBasicBlock(CB.TrueBB));
 
-  // Insert the false branch.
-  if (CB.FalseBB != NextBlock)
-    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
-                         DAG.getBasicBlock(CB.FalseBB));
+  // Insert the false branch. Do this even if it's a fall through branch,
+  // this makes it easier to do DAG optimizations which require inverting
+  // the branch condition.
+  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                       DAG.getBasicBlock(CB.FalseBB));
 
   DAG.setRoot(BrCond);
 }
@@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
                                   Sub, DAG.getConstant(B.Range, VT),
                                   ISD::SETUGT);
 
-  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
-                                       TLI.getPointerTy());
+  // Determine the type of the test operands.
+  bool UsePtrType = false;
+  if (!TLI.isTypeLegal(VT))
+    UsePtrType = true;
+  else {
+    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+        // Switch table case range are encoded into series of masks.
+        // Just use pointer type, it's guaranteed to fit.
+        UsePtrType = true;
+        break;
+      }
+  }
+  if (UsePtrType) {
+    VT = TLI.getPointerTy();
+    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+  }
 
-  B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+  B.RegVT = VT;
+  B.Reg = FuncInfo.CreateReg(VT);
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
-                                    B.Reg, ShiftOp);
+                                    B.Reg, Sub);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 }
 
 /// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+                                           MachineBasicBlock* NextMBB,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
-                                       TLI.getPointerTy());
+  EVT VT = BB.RegVT;
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                       Reg, VT);
   SDValue Cmp;
   if (CountPopulation_64(B.Mask) == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(ShiftOp.getValueType()),
+                       TLI.getSetCCResultType(VT),
                        ShiftOp,
-                       DAG.getConstant(CountTrailingZeros_64(B.Mask),
-                                       TLI.getPointerTy()),
+                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                        ISD::SETEQ);
   } else {
     // Make desired shift
-    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
-                                    TLI.getPointerTy(),
-                                    DAG.getConstant(1, TLI.getPointerTy()),
-                                    ShiftOp);
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+                                    DAG.getConstant(1, VT), ShiftOp);
 
     // Emit bit tests and jumps
     SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
-                                TLI.getPointerTy(), SwitchVal,
-                                DAG.getConstant(B.Mask, TLI.getPointerTy()));
+                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
     Cmp = DAG.getSetCC(getCurDebugLoc(),
-                       TLI.getSetCCResultType(AndOp.getValueType()),
-                       AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+                       TLI.getSetCCResultType(VT),
+                       AndOp, DAG.getConstant(0, VT),
                        ISD::SETNE);
   }
 
@@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
   if (++BBI != FuncInfo.MF->end())
     NextBlock = BBI;
 
-  // TODO: If any two of the cases has the same destination, and if one value
+  // If any two of the cases has the same destination, and if one value
   // is the same as the other, but has one bit unset that the other has set,
   // use bit manipulation to do two compares at once.  For example:
   // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+  // TODO: Handle cases where CR.CaseBB != SwitchBB.
+  if (Size == 2 && CR.CaseBB == SwitchBB) {
+    Case &Small = *CR.Range.first;
+    Case &Big = *(CR.Range.second-1);
+
+    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(SV);
+        EVT VT = CondLHS.getValueType();
+        DebugLoc DL = getCurDebugLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+                                    Or, DAG.getConstant(BigValue, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        SwitchBB->addSuccessor(Small.BB);
+        SwitchBB->addSuccessor(Default);
+
+        // Insert the true branch.
+        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+                                     getControlRoot(), Cond,
+                                     DAG.getBasicBlock(Small.BB));
+
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Default));
+
+        DAG.setRoot(BrCond);
+        return true;
+      }
+    }
+  }
 
   // Rearrange the case blocks so that the last one falls through if possible.
   if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
 }
 
 static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  APInt LastExt(Last), FirstExt(First);
   uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
   return (LastExt - FirstExt + 1ULL);
 }
 
@@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   }
 
   BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == SwitchBB),
+                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
                    CR.CaseBB, Default, BTC);
 
   if (CR.CaseBB == SwitchBB)
@@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   if (Cases.size() >= 2)
     // Must recompute end() each iteration because it may be
     // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
       MachineBasicBlock* nextBB = J->BB;
@@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   return numCmps;
 }
 
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+                                           MachineBasicBlock *Last) {
+  // Update JTCases.
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+    if (JTCases[i].first.HeaderBB == First)
+      JTCases[i].first.HeaderBB = Last;
+
+  // Update BitTestCases.
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+    if (BitTestCases[i].Parent == First)
+      BitTestCases[i].Parent = Last;
+}
+
 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
 
@@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
 void SelectionDAGBuilder::visitFSub(const User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
-  if (Ty->isVectorTy()) {
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      const Type *ElTy = DestTy->getElementType();
-      unsigned VL = DestTy->getNumElements();
-      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-      if (CV == CNZ) {
-        SDValue Op2 = getValue(I.getOperand(1));
-        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                 Op2.getValueType(), Op2));
-        return;
-      }
-    }
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
   }
 
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
-      SDValue Op2 = getValue(I.getOperand(1));
-      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                               Op2.getValueType(), Op2));
-      return;
-    }
-
   visitBinary(I, ISD::FSUB);
 }
 
@@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  if (!I.getType()->isVectorTy() &&
-      Op2.getValueType() != TLI.getShiftAmountTy()) {
+  
+  MVT ShiftTy = TLI.getShiftAmountTy();
+  
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+    
     // If the operand is smaller than the shift count type, promote it.
-    EVT PTy = TLI.getPointerTy();
-    EVT STy = TLI.getShiftAmountTy();
-    if (STy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+    
     // If the operand is larger than the shift count type but the shift
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (STy.getSizeInBits() >=
-             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // Otherwise we'll need to temporarily settle for some other
-    // convenient type; type legalization will make adjustments as
-    // needed.
-    else if (PTy.bitsLT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-    else if (PTy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
   }
 
   setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
@@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
   EVT DestVT = TLI.getValueType(I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
-  // either a BIT_CONVERT or a no-op.
+  // either a BITCAST or a no-op.
   if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                              DestVT, N)); // convert types.
   else
     setValue(&I, N);            // noop cast.
@@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         } else {
           StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
           if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts < SrcNumElts)
+              StartIdx[Input] + MaskNumElts <= SrcNumElts)
             RangeUse[Input] = 1; // Extract from a multiple of the mask length.
         }
       }
@@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   bool IntoUndef = isa<UndefValue>(Op0);
   bool FromUndef = isa<UndefValue>(Op1);
 
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Type *ValTy = I.getType();
   bool OutOfUndef = isa<UndefValue>(Op0);
 
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
 
   SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   if (Align <= StackAlign)
     Align = 0;
 
@@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   SDValue Root;
   bool ConstantMemory = false;
-  if (I.isVolatile())
+  if (I.isVolatile() || NumValues > MaxParallelChains)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
-  else if (AA->pointsToConstantMemory(SV)) {
+  else if (AA->pointsToConstantMemory(
+             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   }
 
   SmallVector<SDValue, 4> Values(NumValues);
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
   EVT PtrVT = Ptr.getValueType();
-  for (unsigned i = 0; i != NumValues; ++i) {
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // Serializing loads here may result in excessive register pressure, and
+    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+    // could recover a bit by hoisting nodes upward in the chain by recognizing
+    // they are side-effect free or do not alias. The optimizer should really
+    // avoid this case by converting large object/array copies to llvm.memcpy
+    // (MaxParallelChains should always remain as failsafe).
+    if (ChainI == MaxParallelChains) {
+      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
     SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, 
-                            isNonTemporal, Alignment);
+                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+                            isNonTemporal, Alignment, TBAAInfo);
 
     Values[i] = L;
-    Chains[i] = L.getValue(1);
+    Chains[ChainI] = L.getValue(1);
   }
 
   if (!ConstantMemory) {
     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                                MVT::Other, &Chains[0], NumValues);
+                                MVT::Other, &Chains[0], ChainI);
     if (isVolatile)
       DAG.setRoot(Chain);
     else
@@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   SDValue Ptr = getValue(PtrV);
 
   SDValue Root = getRoot();
-  SmallVector<SDValue, 4> Chains(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
-
-  for (unsigned i = 0; i != NumValues; ++i) {
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // See visitLoad comments.
+    if (ChainI == MaxParallelChains) {
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
     SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
                               DAG.getConstant(Offsets[i], PtrVT));
-    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
-                             SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, 
-                             isNonTemporal, Alignment);
-  }
-
-  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                          MVT::Other, &Chains[0], NumValues));
+    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+                              SDValue(Src.getNode(), Src.getResNo() + i),
+                              Add, MachinePointerInfo(PtrV, Offsets[i]),
+                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
+    Chains[ChainI] = St;
+  }
+
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+  ++SDNodeOrder;
+  AssignOrderingToNode(StoreNode.getNode());
+  DAG.setRoot(StoreNode);
 }
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
 
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
-  if (!IsTgtIntrinsic)
+  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+      Info.opc == ISD::INTRINSIC_W_CHAIN)
     Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
@@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     // This is target intrinsic that touches memory
     Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
                                      VTs, &Ops[0], Ops.size(),
-                                     Info.memVT, Info.ptrVal, Info.offset,
+                                     Info.memVT,
+                                   MachinePointerInfo(Info.ptrVal, Info.offset),
                                      Info.align, Info.vol,
                                      Info.readMem, Info.writeMem);
   } else if (!HasChain) {
@@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   if (!I.getType()->isVoidTy()) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
       EVT VT = TLI.getValueType(PTy);
-      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
     }
 
     setValue(&I, Result);
@@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
                            DAG.getConstant(0x007fffff, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
                            DAG.getConstant(0x3f800000, MVT::i32));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
 }
 
 /// GetExponent - Get the exponent:
@@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
 
       // Add the exponent into the result in integer domain.
       SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
       //
@@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
 
       // Add the exponent into the result in integer domain.
       SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
       //
@@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
                                              MVT::i32, t13);
 
       // Add the exponent into the result in integer domain.
       SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 TwoToFracPartOfX, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
     }
   } else {
     // No special expansion.
@@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Get the exponent.
     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op = getValue(I.getArgOperand(0));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
@@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
@@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     }
   } else {
@@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
                                getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
       // For floating-point precision of 12:
@@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
                                getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
       // For floating-point precision of 18:
@@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
                                 getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
       SDValue TwoToFractionalPartOfX =
         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
 
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+      result = DAG.getNode(ISD::BITCAST, dl,
                            MVT::f32, TwoToFractionalPartOfX);
     }
   } else {
@@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
 /// At the end of instruction selection, they will be inserted to the entry BB.
 bool
 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
-                                              int64_t Offset, 
+                                              int64_t Offset,
                                               const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
 
   MachineFunction &MF = DAG.getMachineFunction();
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
   // Ignore inlined function arguments here.
   DIVariable DV(Variable);
   if (DV.isInlinedFnArgument(MF.getFunction()))
@@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   if (Arg->hasByValAttr()) {
     // Byval arguments' frame index is recorded during argument lowering.
     // Use this info directly.
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
     Reg = TRI->getFrameRegister(MF);
     Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+    // If byval argument ofset is not recorded then ignore this.
+    if (!Offset)
+      Reg = 0;
   }
 
   if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
     Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       MachineRegisterInfo &RegInfo = MF.getRegInfo();
       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
       if (PR)
@@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   }
 
   if (!Reg) {
+    // Check if ValueMap has reg number.
     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI == FuncInfo.ValueMap.end())
-      return false;
-    Reg = VMI->second;
+    if (VMI != FuncInfo.ValueMap.end())
+      Reg = VMI->second;
   }
 
-  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  if (!Reg && N.getNode()) {
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+        Reg = TRI->getFrameRegister(MF);
+        Offset = FINode->getIndex();
+      }
+  }
+
+  if (!Reg)
+    return false;
+
   MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
                                     TII->get(TargetOpcode::DBG_VALUE))
     .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
 }
 
 // VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
 #endif
 
 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
@@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
-                              I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                              MachinePointerInfo(I.getArgOperand(0)),
+                              MachinePointerInfo(I.getArgOperand(1))));
     return 0;
   }
   case Intrinsic::memset: {
@@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                              I.getArgOperand(0), 0));
+                              MachinePointerInfo(I.getArgOperand(0))));
     return 0;
   }
   case Intrinsic::memmove: {
@@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Op3 = getValue(I.getArgOperand(2));
     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
-    // If the source and destination are known to not be aliases, we can
-    // lower memmove as memcpy.
-    uint64_t Size = -1ULL;
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
-      Size = C->getZExtValue();
-    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
-        AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, 
-                                false, I.getArgOperand(0), 0,
-                                I.getArgOperand(1), 0));
-      return 0;
-    }
-
     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
-                               I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1))));
     return 0;
   }
   case Intrinsic::dbg_declare: {
@@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Check if address has undef value.
     if (isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
-      SDDbgValue*SDV = 
-        DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
-                        0, dl, SDNodeOrder);
-      DAG.AddDbgValue(SDV, 0, false);
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
       return 0;
     }
 
@@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDDbgValue *SDV;
     if (N.getNode()) {
       // Parameters are handled specially.
-      bool isParameter = 
+      bool isParameter =
         DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
@@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           // Byval parameter.  We have a frame index at this point.
           SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                 0, dl, SDNodeOrder);
-        else
+        else {
           // Can't do anything with other non-AI cases yet.  This might be a
           // parameter of a callee function that got inlined, for example.
+          DEBUG(dbgs() << "Dropping debug info for " << DI);
           return 0;
+        }
       } else if (AI)
         SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
                               0, dl, SDNodeOrder);
-      else
+      else {
         // Can't do anything with other non-AI cases yet.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
         return 0;
+      }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
     } else {
-      // If Address is an arugment then try to emits its dbg value using
-      // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
-      // to help track missing debug info.
+      // If Address is an argument then try to emit its dbg value using
+      // virtual register info from the FuncInfo.ValueMap.
       if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
-        SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
-                              0, dl, SDNodeOrder);
-        DAG.AddDbgValue(SDV, 0, false);
+        // If variable is pinned by a alloca in dominating bb then
+        // use StaticAllocaMap.
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+          if (AI->getParent() != DI.getParent()) {
+            DenseMap<const AllocaInst*, int>::iterator SI =
+              FuncInfo.StaticAllocaMap.find(AI);
+            if (SI != FuncInfo.StaticAllocaMap.end()) {
+              SDV = DAG.getDbgValue(Variable, SI->second,
+                                    0, dl, SDNodeOrder);
+              DAG.AddDbgValue(SDV, 0, false);
+              return 0;
+            }
+          }
+        }
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
       }
     }
     return 0;
@@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                 N.getResNo(), Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
-      } else if (isa<PHINode>(V) && !V->use_empty() ) {
+      } else if (!V->use_empty() ) {
         // Do not call getValue(V) yet, as we don't want to generate code.
         // Remember it for later.
         DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
         DanglingDebugInfoMap[V] = DDI;
       } else {
         // We may expand this to cover more cases.  One case where we have no
-        // data available is an unreferenced parameter; we need this fallback.
-        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
-                              Offset, dl, SDNodeOrder);
-        DAG.AddDbgValue(SDV, 0, false);
+        // data available is an unreferenced parameter.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
       }
     }
 
@@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (SI == FuncInfo.StaticAllocaMap.end())
       return 0; // VLAs.
     int FI = SI->second;
-    
+
     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
     if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
@@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_longjmp: {
     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
-                            getRoot(),
-                            getValue(I.getArgOperand(0))));
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_dispatch_setup: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
 
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = getValue(I.getArgOperand(1));
+    if (isa<ConstantSDNode>(ShAmt)) {
+      visitTargetIntrinsic(I, Intrinsic);
+      return 0;
+    }
+    unsigned NewIntrinsic = 0;
+    EVT ShAmtVT = MVT::v2i32;
+    switch (Intrinsic) {
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    // We must do this early because v2i32 is not a legal type.
+    DebugLoc dl = getCurDebugLoc();
+    SDValue ShOps[2];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    EVT DestVT = TLI.getValueType(I.getType());
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       getValue(I.getArgOperand(0)), ShAmt);
+    setValue(&I, Res);
+    return 0;
+  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     // Store the stack protector onto the stack.
     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
-                       PseudoSourceValue::getFixedStack(FI),
-                       0, true, false, 0);
+                       MachinePointerInfo::getFixedStack(FI),
+                       true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
     return 0;
@@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::prefetch: {
     SDValue Ops[4];
+    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
     Ops[3] = getValue(I.getArgOperand(2));
-    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+                                        DAG.getVTList(MVT::Other),
+                                        &Ops[0], 4,
+                                        EVT::getIntegerVT(*Context, 8),
+                                        MachinePointerInfo(I.getArgOperand(0)),
+                                        0, /* align */
+                                        false, /* volatile */
+                                        rw==0, /* read */
+                                        rw==1)); /* write */
     return 0;
   }
-
   case Intrinsic::memory_barrier: {
     SDValue Ops[6];
     Ops[0] = getRoot();
@@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                     getValue(I.getArgOperand(0)),
                     getValue(I.getArgOperand(1)),
                     getValue(I.getArgOperand(2)),
-                    I.getArgOperand(0));
+                    MachinePointerInfo(I.getArgOperand(0)));
     setValue(&I, L);
     DAG.setRoot(L.getValue(1));
     return 0;
@@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                         FTy->isVarArg(), Outs, FTy->getContext());
 
   SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
     uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
                       FTy->getReturnType());
     MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
     const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
 
-    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
     Entry.Node = DemoteStackSlot;
     Entry.Ty = StackSlotPtrType;
     Entry.isSExt = false;
@@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                 DemoteStackSlot,
                                 DAG.getConstant(Offsets[i], PtrVT));
       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, false, 1);
+                              Add,
+                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+                              false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                                 MVT::Other, &Chains[0], NumValues);
     PendingLoads.push_back(Chain);
-    
+
     // Collect the legal value parts into potentially illegal values
     // that correspond to the original function's return values.
     SmallVector<EVT, 4> RetTys;
@@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       EVT VT = RetTys[I];
       EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-  
+
       SDValue ReturnValue =
         getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
                          RegisterVT, VT, AssertOp);
@@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
 
   SDValue Ptr = Builder.getValue(PtrVal);
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
-                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+                                        Ptr, MachinePointerInfo(PtrVal),
                                         false /*volatile*/,
                                         false /*nontemporal*/, 1 /* align=1 */);
 
@@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     visitInlineAsm(&I);
     return;
   }
-  
+
+  // See if any floating point values are being passed to this function. This is
+  // used to emit an undefined reference to fltused on Windows.
+  const FunctionType *FT =
+    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  if (FT->isVarArg() &&
+      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      const Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (!i->isFloatingPointTy()) continue;
+        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+        break;
+      }
+    }
+  }
+
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
     if (F->isDeclaration()) {
@@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
       }
     }
   }
-  
+
   SDValue Callee;
   if (!RenameFn)
     Callee = getValue(I.getCalledValue());
@@ -5008,7 +5226,7 @@ public:
   /// contains the set of register corresponding to the operand.
   RegsForValue AssignedRegs;
 
-  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   }
 
@@ -5083,6 +5301,8 @@ private:
   }
 };
 
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
 } // end llvm namespace.
 
 /// isAllocatableRegister - If the specified register is safe to allocate,
@@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
       // vector types).
       EVT RegVT = *PhysReg.second->vt_begin();
       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
         // machine.
         RegVT = EVT::getIntegerVT(Context,
                                   OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
       }
@@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
 
   /// ConstraintOperands - Information about all of the constraints.
-  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+  SDISelAsmOperandInfoVector ConstraintOperands;
 
   std::set<unsigned> OutputRegs, InputRegs;
 
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
-
-  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
-  SDValue Chain, Flag;
-
-  // We won't need to flush pending loads if this asm doesn't touch
-  // memory and is nonvolatile.
-  if (hasMemory || IA->hasSideEffects())
-    Chain = getRoot();
-  else
-    Chain = DAG.getRoot();
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+  bool hasMemory = false;
 
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   unsigned ResNo = 0;   // ResNo - The result number of the next output.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
 
     EVT OpVT = MVT::Other;
@@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this is an input or an indirect output, process the call argument.
     // BasicBlocks are labels, currently appearing only in asm's.
     if (OpInfo.CallOperandVal) {
-      // Strip bitcasts, if any.  This mostly comes up for functions.
-      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
       if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
       } else {
@@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
 
     OpInfo.ConstraintVT = OpVT;
+
+    // Indirect operand accesses access memory.
+    if (OpInfo.isIndirect)
+      hasMemory = true;
+    else {
+      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        if (CType == TargetLowering::C_Memory) {
+          hasMemory = true;
+          break;
+        }
+      }
+    }
   }
 
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
   // Second pass over the constraints: compute which constraint option to use
   // and assign registers to constraints that want a specific physreg.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
 
     // If this is an output operand with a matching input operand, look up the
@@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-      
+
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
@@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // need to to provide an address for the memory input.
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         !OpInfo.isIndirect) {
-      assert(OpInfo.Type == InlineAsm::isInput &&
+      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
              "Can only indirectify direct input operands!");
 
       // Memory operands really want the address of the value.  If we don't have
@@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             OpInfo.CallOperand, StackSlot,
+                             MachinePointerInfo::getFixedStack(SSFI),
                              false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
@@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
   }
 
-  ConstraintInfos.clear();
-
   // Second pass - Loop over all of the operands, assigning virtual or physregs
   // to register class operands.
   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
 
-  // Remember the AlignStack bit as operand 3.
-  AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
-                                            MVT::i1));
+  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  unsigned ExtraInfo = 0;
+  if (IA->hasSideEffects())
+    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+  if (IA->isAlignStack())
+    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+                                                  TLI.getPointerTy()));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                           " don't know how to handle tied "
                           "indirect register inputs");
           }
-          
+
           RegsForValue MatchedRegs;
           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
           EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
                                            DAG, AsmNodeOperands);
           break;
         }
-        
+
         assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
         assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
                "Unexpected number of operands");
@@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       // Treat indirect 'X' constraint as memory.
-      if (OpInfo.ConstraintType == TargetLowering::C_Other && 
-          OpInfo.isIndirect) 
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+          OpInfo.isIndirect)
         OpInfo.ConstraintType = TargetLowering::C_Memory;
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
       }
-      
+
       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
 
   Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
-                      DAG.getVTList(MVT::Other, MVT::Flag),
+                      DAG.getVTList(MVT::Other, MVT::Glue),
                       &AsmNodeOperands[0], AsmNodeOperands.size());
   Flag = Chain.getValue(1);
 
@@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // not have the same VT as was expected.  Convert it to the right type
       // with bit_convert.
       if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
                           ResultType, Val);
 
       } else if (ResultType != Val.getValueType() &&
@@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                StoresToEmit[i].first,
                                getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0,
+                               MachinePointerInfo(StoresToEmit[i].second),
                                false, false, 0);
     OutChains.push_back(Val);
   }
@@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
     for (unsigned i = 0; i != NumRegs; ++i) {
       ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
+      MyFlags.VT = RegisterVT.getSimpleVT();
       MyFlags.Used = isReturnValueUsed;
       if (RetSExt)
         MyFlags.Flags.setSExt();
@@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
           assert(InVals[i].getNode() &&
                  "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
+          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                  "LowerCall emitted a value with the wrong type!");
         });
 
@@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         assert(InVals[i].getNode() &&
                "LowerFormalArguments emitted a null value!");
-        assert(Ins[i].VT == InVals[i].getValueType() &&
+        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
                "LowerFormalArguments emitted a value with the wrong type!");
       }
     });
@@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     // Note down frame index for byval arguments.
     if (I->hasByValAttr() && !ArgValues.empty())
-      if (FrameIndexSDNode *FI = 
+      if (FrameIndexSDNode *FI =
           dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
         FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f400e9c83ac..a1a70c394a51 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -258,15 +258,16 @@ private:
 
   struct BitTestBlock {
     BitTestBlock(APInt F, APInt R, const Value* SV,
-                 unsigned Rg, bool E,
+                 unsigned Rg, EVT RgVT, bool E,
                  MachineBasicBlock* P, MachineBasicBlock* D,
                  const BitTestInfo& C):
-      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
       Parent(P), Default(D), Cases(C) { }
     APInt First;
     APInt Range;
     const Value *SValue;
     unsigned Reg;
+    EVT RegVT;
     bool Emitted;
     MachineBasicBlock *Parent;
     MachineBasicBlock *Default;
@@ -347,7 +348,7 @@ public:
   SDValue getControlRoot();
 
   DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-
+  void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
   unsigned getSDNodeOrder() const { return SDNodeOrder; }
 
   void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
@@ -398,6 +399,10 @@ public:
   void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
                    MachineBasicBlock *LandingPad = NULL);
 
+  /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+  /// references that ned to refer to the last resulting block.
+  void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
 private:
   // Terminator instructions.
   void visitRet(const ReturnInst &I);
@@ -431,7 +436,8 @@ public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
   void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
-  void visitBitTestCase(MachineBasicBlock* NextMBB,
+  void visitBitTestCase(BitTestBlock &BB,
+                        MachineBasicBlock* NextMBB,
                         unsigned Reg,
                         BitTestCase &B,
                         MachineBasicBlock *SwitchBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ceb09e5..62ebc81ef86e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -53,8 +54,17 @@
 using namespace llvm;
 
 STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+          "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+          "Number of machine blocks with out of order line number info");
+#endif
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+                                   CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(tm)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
-  DAGSize(0)
-{}
+  DAGSize(0) {
+    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+  }
 
 SelectionDAGISel::~SelectionDAGISel() {
   delete SDB;
@@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 static bool FunctionCallsSetJmp(const Function *F) {
   const Module *M = F->getParent();
   static const char *ReturnsTwiceFns[] = {
+    "_setjmp",
     "setjmp",
     "sigsetjmp",
     "setjmp_syscall",
@@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {
 #undef NUM_RETURNS_TWICE_FNS
 }
 
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it.  In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+  // Loop for blocks with phi nodes.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    PHINode *PN = dyn_cast<PHINode>(BB->begin());
+    if (PN == 0) continue;
+
+  ReprocessBlock:
+    // For each block with a PHI node, check to see if any of the input values
+    // are potentially trapping constant expressions.  Constant expressions are
+    // the only potentially trapping value that can occur as the argument to a
+    // PHI.
+    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+        if (CE == 0 || !CE->canTrap()) continue;
+
+        // The only case we have to worry about is when the edge is critical.
+        // Since this block has a PHI Node, we assume it has multiple input
+        // edges: check to see if the pred has multiple successors.
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        if (Pred->getTerminator()->getNumSuccessors() == 1)
+          continue;
+
+        // Okay, we have to split this edge.
+        SplitCriticalEdge(Pred->getTerminator(),
+                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        goto ReprocessBlock;
+      }
+  }
+}
+
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF);
   SDB->init(GFI, *AA);
@@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   if (!FuncInfo->ArgDbgValues.empty())
     for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
            E = RegInfo->livein_end(); LI != E; ++LI)
-      if (LI->second) 
+      if (LI->second)
         LiveInMap.insert(std::make_pair(LI->first, LI->second));
 
   // Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
     if (LDI != LiveInMap.end()) {
       MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
       MachineBasicBlock::iterator InsertPos = Def;
-      const MDNode *Variable = 
+      const MDNode *Variable =
         MI->getOperand(MI->getNumOperands()-1).getMetadata();
       unsigned Offset = MI->getOperand(1).getImm();
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), 
+      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
               TII.get(TargetOpcode::DBG_VALUE))
         .addReg(LDI->second, RegState::Debug)
         .addImm(Offset).addMetadata(Variable);
+
+      // If this vreg is directly copied into an exported register then
+      // that COPY instructions also need DBG_VALUE, if it is the only
+      // user of LDI->second.
+      MachineInstr *CopyUseMI = NULL;
+      for (MachineRegisterInfo::use_iterator
+             UI = RegInfo->use_begin(LDI->second);
+           MachineInstr *UseMI = UI.skipInstruction();) {
+        if (UseMI->isDebugValue()) continue;
+        if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+          CopyUseMI = UseMI; continue;
+        }
+        // Otherwise this is another use or second copy use.
+        CopyUseMI = NULL; break;
+      }
+      if (CopyUseMI) {
+        MachineInstr *NewMI =
+          BuildMI(*MF, CopyUseMI->getDebugLoc(),
+                  TII.get(TargetOpcode::DBG_VALUE))
+          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+          .addImm(Offset).addMetadata(Variable);
+        EntryMBB->insertAfter(CopyUseMI, NewMI);
+      }
     }
   }
 
@@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
              II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
         const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
 
-        // Operand 1 of an inline asm instruction indicates whether the asm
-        // needs stack or not.
-        if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
-            (TID.isCall() && !TID.isReturn())) {
+        if ((TID.isCall() && !TID.isReturn()) ||
+            II->isStackAligningInlineAsm()) {
           MFI->setHasCalls(true);
           goto done;
         }
@@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
+  return;
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 
     // Only install this information if it tells us something.
     if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
-      DestReg -= TargetRegisterInfo::FirstVirtualRegister;
-      if (DestReg >= FuncInfo->LiveOutRegInfo.size())
-        FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+      FuncInfo->LiveOutRegInfo.grow(DestReg);
       FunctionLoweringInfo::LiveOutInfo &LOI =
         FuncInfo->LiveOutRegInfo[DestReg];
       LOI.NumSignBits = NumSignBits;
@@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Emit machine code to BB.  This can change 'BB' to the last block being
   // inserted into.
+  MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
   {
     NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
 
-    FuncInfo->MBB = Scheduler->EmitSchedule();
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
     FuncInfo->InsertPt = Scheduler->InsertPos;
   }
 
+  // If the block was split, make sure we update any references that are used to
+  // update PHI nodes later on.
+  if (FirstMBB != LastMBB)
+    SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
   // Free the scheduler state.
   {
     NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {
   DEBUG(errs() << "===== Instruction selection begins:\n");
 
   PreprocessISelDAG();
-  
+
   // Select target instructions for the DAG.
   {
     // Number all nodes with a topological order and set DAGSize.
     DAGSize = CurDAG->AssignTopologicalOrder();
-    
+
     // Create a dummy node (which is not added to allnodes), that adds
     // a reference to the root node, preventing it from being deleted,
     // and tracking any changes of the root.
     HandleSDNode Dummy(CurDAG->getRoot());
     ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
     ++ISelPosition;
-    
+
     // The AllNodes list is now topological-sorted. Visit the
     // nodes by starting at the end of the list (the root of the
     // graph) and preceding back toward the beginning (the entry
@@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {
       // makes it theoretically possible to disable the DAGCombiner.
       if (Node->use_empty())
         continue;
-      
+
       SDNode *ResNode = Select(Node);
-      
+
       // FIXME: This is pretty gross.  'Select' should be changed to not return
       // anything at all and this code should be nuked with a tactical strike.
-      
+
       // If node should not be replaced, continue with the next one.
       if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
         continue;
       // Replace node.
       if (ResNode)
         ReplaceUses(Node, ResNode);
-      
+
       // If after the replacement this node is not used any more,
       // remove this dead node.
       if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {
         CurDAG->RemoveDeadNode(Node, &ISU);
       }
     }
-    
+
     CurDAG->setRoot(Dummy.getValue());
-  }    
+  }
 
   DEBUG(errs() << "===== Instruction selection ends:\n");
 
@@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   }
 }
 
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             FastISel *FastIS) {
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+
+  // Figure out which vreg this is going into.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  MachineInstr *User = &*RI;
+  
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes, make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo->InsertPt = User;
+  FuncInfo->MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (BasicBlock::const_iterator BI = BB->begin(),
+         BE = BB->end(); BI != BE; ++BI) {
+    const DebugLoc DL = BI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+         MBE = MBB->end(); MBI != MBE; ++MBI) {
+    const DebugLoc DL = MBI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumMBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+#endif
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Iterate over all basic blocks in the function.
   for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     const BasicBlock *LLVMBB = &*I;
+#ifndef NDEBUG
+    CheckLineNumbers(LLVMBB);
+#endif
     FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
@@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     if (FuncInfo->MBB->isLandingPad())
       PrepareEHLandingPad();
-    
+
     // Lower any arguments needed in this block if this is the entry block.
-    if (LLVMBB == &Fn.getEntryBlock())
+    if (LLVMBB == &Fn.getEntryBlock()) {
+      for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
+           DBI != DBE; ++DBI) {
+        if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
+          const DebugLoc DL = DI->getDebugLoc();
+          SDB->setCurDebugLoc(DL);
+          break;
+        }
+      }
       LowerArguments(LLVMBB);
+    }
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
     if (FastIS) {
@@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         FastIS->recomputeInsertPt();
 
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(Inst))
+        if (FastIS->SelectInstruction(Inst)) {
+          // If fast isel succeeded, check to see if there is a single-use
+          // non-volatile load right before the selected instruction, and see if
+          // the load is used by the instruction.  If so, try to fold it.
+          const Instruction *BeforeInst = 0;
+          if (Inst != Begin)
+            BeforeInst = llvm::prior(llvm::prior(BI));
+          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+            --BI; // If we succeeded, don't re-select the load.
           continue;
+        }
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {
@@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FastIS->recomputeInsertPt();
     }
 
+    if (Begin != BI)
+      ++NumDAGBlocks;
+    else
+      ++NumFastIselBlocks;
+
     // Run SelectionDAG instruction selection on the remainder of the block
     // not handled by FastISel. If FastISel is not run, this is the entire
     // block.
@@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   }
 
   delete FastIS;
+#ifndef NDEBUG
+  for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+       MBI != MBE; ++MBI)
+    CheckLineNumbers(MBI);
+#endif
 }
 
 void
@@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {
       FuncInfo->InsertPt = FuncInfo->MBB->end();
       // Emit the code
       if (j+1 != ej)
-        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Cases[j+1].ThisBB,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
       else
-        SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Default,
                               SDB->BitTestCases[i].Reg,
                               SDB->BitTestCases[i].Cases[j],
                               FuncInfo->MBB);
@@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {
   // additional DAGs necessary.
   for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
     FuncInfo->InsertPt = FuncInfo->MBB->end();
 
     // Determine the unique successors.
@@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {
     if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
       Succs.push_back(SDB->SwitchCases[i].FalseBB);
 
-    // Emit the code. Note that this could result in ThisBB being split, so
-    // we need to check for updates.
+    // Emit the code. Note that this could result in FuncInfo->MBB being split.
     SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
     CurDAG->setRoot(SDB->getRoot());
     SDB->clear();
     CodeGenAndEmitDAG();
-    ThisBB = FuncInfo->MBB;
+
+    // Remember the last block, now that any splitting is done, for use in
+    // populating PHI nodes in successors.
+    MachineBasicBlock *ThisBB = FuncInfo->MBB;
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
@@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
   return Ctor(this, OptLevel);
 }
 
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
-  return new ScheduleHazardRecognizer();
-}
-
 //===----------------------------------------------------------------------===//
 // Helper functions used by the generated instruction selector.
 //===----------------------------------------------------------------------===//
@@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
   Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
   Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
   Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
-  Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]);  // 3
+  Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]);  // 3 (SideEffect, AlignStack)
 
   unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
-  if (InOps[e-1].getValueType() == MVT::Flag)
-    --e;  // Don't process a flag operand if it is here.
+  if (InOps[e-1].getValueType() == MVT::Glue)
+    --e;  // Don't process a glue operand if it is here.
 
   while (i != e) {
     unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
     }
   }
 
-  // Add the flag input back if present.
+  // Add the glue input back if present.
   if (e != InOps.size())
     Ops.push_back(InOps.back());
 }
 
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
 /// SDNode.
 ///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
   unsigned FlagResNo = N->getNumValues()-1;
   for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
     SDUse &Use = I.getUse();
@@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
   // never find it.
   //
   // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
-  // happen because we scan down to newly selected nodes in the case of flag
+  // happen because we scan down to newly selected nodes in the case of glue
   // uses.
   if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
     return false;
-  
+
   // Don't revisit nodes if we already scanned it and didn't fail, we know we
   // won't fail if we scan it again.
   if (!Visited.insert(Use))
@@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
     // Ignore chain uses, they are validated by HandleMergeInputChains.
     if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
       continue;
-    
+
     SDNode *N = Use->getOperand(i).getNode();
     if (N == Def) {
       if (Use == ImmedUse || Use == Root)
@@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   //
   // * indicates nodes to be folded together.
   //
-  // If Root produces a flag, then it gets (even more) interesting. Since it
-  // will be "glued" together with its flag use in the scheduler, we need to
+  // If Root produces glue, then it gets (even more) interesting. Since it
+  // will be "glued" together with its glue use in the scheduler, we need to
   // check if it might reach N.
   //
   //          [N*]           //
@@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
   //           ^   /         //
   //           f  /          //
   //           | /           //
-  //          [FU]           //
+  //          [GU]           //
   //
-  // If FU (flag use) indirectly reaches N (the load), and Root folds N
-  // (call it Fold), then X is a predecessor of FU and a successor of
-  // Fold. But since Fold and FU are flagged together, this will create
+  // If GU (glue use) indirectly reaches N (the load), and Root folds N
+  // (call it Fold), then X is a predecessor of GU and a successor of
+  // Fold. But since Fold and GU are glued together, this will create
   // a cycle in the scheduling graph.
 
-  // If the node has flags, walk down the graph to the "lowest" node in the
-  // flagged set.
+  // If the node has glue, walk down the graph to the "lowest" node in the
+  // glueged set.
   EVT VT = Root->getValueType(Root->getNumValues()-1);
-  while (VT == MVT::Flag) {
-    SDNode *FU = findFlagUse(Root);
-    if (FU == NULL)
+  while (VT == MVT::Glue) {
+    SDNode *GU = findGlueUse(Root);
+    if (GU == NULL)
       break;
-    Root = FU;
+    Root = GU;
     VT = Root->getValueType(Root->getNumValues()-1);
-    
-    // If our query node has a flag result with a use, we've walked up it.  If
+
+    // If our query node has a glue result with a use, we've walked up it.  If
     // the user (which has already been selected) has a chain or indirectly uses
     // the chain, our WalkChainUsers predicate will not consider it.  Because of
     // this, we cannot ignore chains in this predicate.
     IgnoreChains = false;
   }
-  
+
 
   SmallPtrSet<SDNode*, 16> Visited;
   return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
@@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
   std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops);
-    
+
   std::vector<EVT> VTs;
   VTs.push_back(MVT::Other);
-  VTs.push_back(MVT::Flag);
+  VTs.push_back(MVT::Glue);
   SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
   New->setNodeId(-1);
@@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
 }
 
 /// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
   assert(Val >= 128 && "Not a VBR");
   Val &= 127;  // Remove first vbr bit.
-  
+
   unsigned Shift = 7;
   uint64_t NextBits;
   do {
@@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
     Val |= (NextBits&127) << Shift;
     Shift += 7;
   } while (NextBits & 128);
-  
+
   return Val;
 }
 
 
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
 void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
-                     const SmallVectorImpl<SDNode*> &ChainNodesMatched,
-                     SDValue InputFlag,
-                     const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
-                     bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                    const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                    SDValue InputGlue,
+                    const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+                    bool isMorphNodeTo) {
   SmallVector<SDNode*, 4> NowDeadNodes;
-  
+
   ISelUpdater ISU(ISelPosition);
 
   // Now that all the normal results are replaced, we replace the chain and
-  // flag results if present.
+  // glue results if present.
   if (!ChainNodesMatched.empty()) {
     assert(InputChain.getNode() != 0 &&
            "Matched input chains but didn't produce a chain");
@@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
     // Replace all the chain results with the final chain we ended up with.
     for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
       SDNode *ChainNode = ChainNodesMatched[i];
-      
+
       // If this node was already deleted, don't look at it.
       if (ChainNode->getOpcode() == ISD::DELETED_NODE)
         continue;
-      
+
       // Don't replace the results of the root node if we're doing a
       // MorphNodeTo.
       if (ChainNode == NodeToMatch && isMorphNodeTo)
         continue;
-      
+
       SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
-      if (ChainVal.getValueType() == MVT::Flag)
+      if (ChainVal.getValueType() == MVT::Glue)
         ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
       assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
       CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-      
+
       // If the node became dead and we haven't already seen it, delete it.
       if (ChainNode->use_empty() &&
           !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
         NowDeadNodes.push_back(ChainNode);
     }
   }
-  
-  // If the result produces a flag, update any flag results in the matched
-  // pattern with the flag result.
-  if (InputFlag.getNode() != 0) {
+
+  // If the result produces glue, update any glue results in the matched
+  // pattern with the glue result.
+  if (InputGlue.getNode() != 0) {
     // Handle any interior nodes explicitly marked.
-    for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
-      SDNode *FRN = FlagResultNodesMatched[i];
-      
+    for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = GlueResultNodesMatched[i];
+
       // If this node was already deleted, don't look at it.
       if (FRN->getOpcode() == ISD::DELETED_NODE)
         continue;
-      
-      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
-             "Doesn't have a flag result");
+
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+             "Doesn't have a glue result");
       CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
-                                        InputFlag, &ISU);
-      
+                                        InputGlue, &ISU);
+
       // If the node became dead and we haven't already seen it, delete it.
       if (FRN->use_empty() &&
           !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
         NowDeadNodes.push_back(FRN);
     }
   }
-  
+
   if (!NowDeadNodes.empty())
     CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-  
+
   DEBUG(errs() << "ISEL: Match complete!\n");
 }
 
@@ -1392,17 +1589,17 @@ enum ChainResult {
 ///
 /// The walk we do here is guaranteed to be small because we quickly get down to
 /// already selected nodes "below" us.
-static ChainResult 
+static ChainResult
 WalkChainUsers(SDNode *ChainedNode,
                SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
                SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
   ChainResult Result = CR_Simple;
-  
+
   for (SDNode::use_iterator UI = ChainedNode->use_begin(),
          E = ChainedNode->use_end(); UI != E; ++UI) {
     // Make sure the use is of the chain, not some other value we produce.
     if (UI.getUse().getValueType() != MVT::Other) continue;
-    
+
     SDNode *User = *UI;
 
     // If we see an already-selected machine node, then we've gone beyond the
@@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,
     if (User->isMachineOpcode() ||
         User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
       continue;
-    
+
     if (User->getOpcode() == ISD::CopyToReg ||
         User->getOpcode() == ISD::CopyFromReg ||
         User->getOpcode() == ISD::INLINEASM ||
@@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,
       if (!std::count(ChainedNodesInPattern.begin(),
                       ChainedNodesInPattern.end(), User))
         return CR_InducesCycle;
-      
+
       // Otherwise we found a node that is part of our pattern.  For example in:
       //   x = load ptr
       //   y = x+4
@@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,
       InteriorChainedNodes.push_back(User);
       continue;
     }
-    
+
     // If we found a TokenFactor, there are two cases to consider: first if the
     // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
     // uses of the TF are in our pattern) we just want to ignore it.  Second,
@@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,
     case CR_LeadsToInteriorNode:
       break;  // Otherwise, keep processing.
     }
-    
+
     // Okay, we know we're in the interesting interior case.  The TokenFactor
     // is now going to be considered part of the pattern so that we rewrite its
     // uses (it may have uses that are not part of the pattern) with the
@@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,
     InteriorChainedNodes.push_back(User);
     continue;
   }
-  
+
   return Result;
 }
 
@@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
                        InteriorChainedNodes) == CR_InducesCycle)
       return SDValue(); // Would induce a cycle.
   }
-  
+
   // Okay, we have walked all the matched nodes and collected TokenFactor nodes
   // that we are interested in.  Form our input TokenFactor node.
   SmallVector<SDValue, 3> InputChains;
@@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
     if (N->getOpcode() != ISD::TokenFactor) {
       if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
         continue;
-      
+
       // Otherwise, add the input chain.
       SDValue InChain = ChainNodesMatched[i]->getOperand(0);
       assert(InChain.getValueType() == MVT::Other && "Not a chain");
       InputChains.push_back(InChain);
       continue;
     }
-    
+
     // If we have a token factor, we want to add all inputs of the token factor
     // that are not part of the pattern we're matching.
     for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
         InputChains.push_back(N->getOperand(op));
     }
   }
-  
+
   SDValue Res;
   if (InputChains.size() == 1)
     return InputChains[0];
   return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
                          MVT::Other, &InputChains[0], InputChains.size());
-}  
+}
 
 /// MorphNode - Handle morphing a node in place for the selector.
 SDNode *SelectionDAGISel::
@@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
           const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
   // It is possible we're using MorphNodeTo to replace a node with no
   // normal results with one that has a normal result (or we could be
-  // adding a chain) and the input could have flags and chains as well.
+  // adding a chain) and the input could have glue and chains as well.
   // In this case we need to shift the operands down.
   // FIXME: This is a horrible hack and broken in obscure cases, no worse
   // than the old isel though.
-  int OldFlagResultNo = -1, OldChainResultNo = -1;
+  int OldGlueResultNo = -1, OldChainResultNo = -1;
 
   unsigned NTMNumResults = Node->getNumValues();
-  if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
-    OldFlagResultNo = NTMNumResults-1;
+  if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+    OldGlueResultNo = NTMNumResults-1;
     if (NTMNumResults != 1 &&
         Node->getValueType(NTMNumResults-2) == MVT::Other)
       OldChainResultNo = NTMNumResults-2;
@@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
   }
 
   unsigned ResNumResults = Res->getNumValues();
-  // Move the flag if needed.
-  if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
-      (unsigned)OldFlagResultNo != ResNumResults-1)
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), 
+  // Move the glue if needed.
+  if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+      (unsigned)OldGlueResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
                                       SDValue(Res, ResNumResults-1));
 
-  if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+  if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
     --ResNumResults;
 
   // Move the chain reference if needed.
   if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
       (unsigned)OldChainResultNo != ResNumResults-1)
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), 
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
                                       SDValue(Res, ResNumResults-1));
 
   // Otherwise, no replacement happened because the node already exists. Replace
   // Uses of the old node with the new one.
   if (Res != Node)
     CurDAG->ReplaceAllUsesWith(Node, Res);
-  
+
   return Res;
 }
 
 /// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-          SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+          SDValue N,
+          const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
   // Accept if it is exactly the same as a previously recorded node.
   unsigned RecNo = MatcherTable[MatcherIndex++];
   assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-  return N == RecordedNodes[RecNo];
+  return N == RecordedNodes[RecNo].first;
 }
-  
+
 /// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                       SelectionDAGISel &SDISel) {
   return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
 }
 
 /// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                    SelectionDAGISel &SDISel, SDNode *N) {
   return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDNode *N) {
   uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return N->getOpcode() == Opc;
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
           SDValue N, const TargetLowering &TLI) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (N.getValueType() == VT) return true;
-  
+
   // Handle the case when VT is iPTR.
   return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering &TLI,
                unsigned ChildNo) {
@@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 }
 
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
               SDValue N) {
   return cast<CondCodeSDNode>(N)->get() ==
       (ISD::CondCode)MatcherTable[MatcherIndex++];
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering &TLI) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (cast<VTSDNode>(N)->getVT() == VT)
     return true;
-  
+
   // Handle the case when VT is iPTR.
   return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
              SDValue N) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
   return C != 0 && C->getSExtValue() == Val;
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDValue N, SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   if (N->getOpcode() != ISD::AND) return false;
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
 }
 
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
            SDValue N, SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
   if (Val & 128)
     Val = GetVBR(Val, MatcherTable, MatcherIndex);
-  
+
   if (N->getOpcode() != ISD::OR) return false;
-  
+
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
 }
@@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 /// fail, set Result=true and return anything.  If the current predicate is
 /// known to pass, set Result=false and return the MatcherIndex to continue
 /// with.  If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with. 
+/// MatcherIndex to continue with.
 static unsigned IsPredicateKnownToFail(const unsigned char *Table,
                                        unsigned Index, SDValue N,
                                        bool &Result, SelectionDAGISel &SDISel,
-                                       SmallVectorImpl<SDValue> &RecordedNodes){
+                 SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
   switch (Table[Index++]) {
   default:
     Result = false;
@@ -1782,21 +1980,21 @@ namespace {
 struct MatchScope {
   /// FailIndex - If this match fails, this is the index to continue with.
   unsigned FailIndex;
-  
+
   /// NodeStack - The node stack when the scope was formed.
   SmallVector<SDValue, 4> NodeStack;
-  
+
   /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
   unsigned NumRecordedNodes;
-  
+
   /// NumMatchedMemRefs - The number of matched memref entries.
   unsigned NumMatchedMemRefs;
-  
-  /// InputChain/InputFlag - The current chain/flag 
-  SDValue InputChain, InputFlag;
+
+  /// InputChain/InputGlue - The current chain/glue
+  SDValue InputChain, InputGlue;
 
   /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
-  bool HasChainNodesMatched, HasFlagResultNodesMatched;
+  bool HasChainNodesMatched, HasGlueResultNodesMatched;
 };
 
 }
@@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
   case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
   }
-  
+
   assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
 
   // Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   // MatchScopes - Scopes used when matching, if a match failure happens, this
   // indicates where to continue checking.
   SmallVector<MatchScope, 8> MatchScopes;
-  
+
   // RecordedNodes - This is the set of nodes that have been recorded by the
-  // state machine.
-  SmallVector<SDValue, 8> RecordedNodes;
-  
+  // state machine.  The second value is the parent of the node, or null if the
+  // root is recorded.
+  SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
   // MatchedMemRefs - This is the set of MemRef's we've seen in the input
   // pattern.
   SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-  
-  // These are the current input chain and flag for use when generating nodes.
+
+  // These are the current input chain and glue for use when generating nodes.
   // Various Emit operations change these.  For example, emitting a copytoreg
   // uses and updates these.
-  SDValue InputChain, InputFlag;
-  
+  SDValue InputChain, InputGlue;
+
   // ChainNodesMatched - If a pattern matches nodes that have input/output
   // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
   // which ones they are.  The result is captured into this list so that we can
   // update the chain results when the pattern is complete.
   SmallVector<SDNode*, 3> ChainNodesMatched;
-  SmallVector<SDNode*, 3> FlagResultNodesMatched;
-  
+  SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
   DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
         NodeToMatch->dump(CurDAG);
         errs() << '\n');
-  
+
   // Determine where to start the interpreter.  Normally we start at opcode #0,
   // but if the state machine starts with an OPC_SwitchOpcode, then we
   // accelerate the first lookup (which is guaranteed to be hot) with the
   // OpcodeOffset table.
   unsigned MatcherIndex = 0;
-  
+
   if (!OpcodeOffset.empty()) {
     // Already computed the OpcodeOffset table, just index into it.
     if (N.getOpcode() < OpcodeOffset.size())
@@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     if (N.getOpcode() < OpcodeOffset.size())
       MatcherIndex = OpcodeOffset[N.getOpcode()];
   }
-  
+
   while (1) {
     assert(MatcherIndex < TableSize && "Invalid index");
 #ifndef NDEBUG
@@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       // determine immediately that the first check (or first several) will
       // immediately fail, don't even bother pushing a scope for them.
       unsigned FailIndex;
-      
+
       while (1) {
         unsigned NumToSkip = MatcherTable[MatcherIndex++];
         if (NumToSkip & 128)
@@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           FailIndex = 0;
           break;
         }
-        
+
         FailIndex = MatcherIndex+NumToSkip;
-        
+
         unsigned MatcherIndexOfPredicate = MatcherIndex;
         (void)MatcherIndexOfPredicate; // silence warning.
-        
+
         // If we can't evaluate this predicate without pushing a scope (e.g. if
         // it is a 'MoveParent') or if the predicate succeeds on this node, we
         // push the scope and evaluate the full predicate chain.
@@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                                               Result, *this, RecordedNodes);
         if (!Result)
           break;
-        
+
         DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
                      << "index " << MatcherIndexOfPredicate
                      << ", continuing at " << FailIndex << "\n");
         ++NumDAGIselRetries;
-        
+
         // Otherwise, we know that this case of the Scope is guaranteed to fail,
         // move to the next case.
         MatcherIndex = FailIndex;
       }
-      
+
       // If the whole scope failed to match, bail.
       if (FailIndex == 0) break;
-      
+
       // Push a MatchScope which indicates where to go if the first child fails
       // to match.
       MatchScope NewEntry;
@@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       NewEntry.NumRecordedNodes = RecordedNodes.size();
       NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
       NewEntry.InputChain = InputChain;
-      NewEntry.InputFlag = InputFlag;
+      NewEntry.InputGlue = InputGlue;
       NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
-      NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+      NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
       MatchScopes.push_back(NewEntry);
       continue;
     }
-    case OPC_RecordNode:
+    case OPC_RecordNode: {
       // Remember this node, it may end up being an operand in the pattern.
-      RecordedNodes.push_back(N);
+      SDNode *Parent = 0;
+      if (NodeStack.size() > 1)
+        Parent = NodeStack[NodeStack.size()-2].getNode();
+      RecordedNodes.push_back(std::make_pair(N, Parent));
       continue;
-        
+    }
+
     case OPC_RecordChild0: case OPC_RecordChild1:
     case OPC_RecordChild2: case OPC_RecordChild3:
     case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (ChildNo >= N.getNumOperands())
         break;  // Match fails if out of range child #.
 
-      RecordedNodes.push_back(N->getOperand(ChildNo));
+      RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+                                             N.getNode()));
       continue;
     }
     case OPC_RecordMemRef:
       MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
       continue;
-        
-    case OPC_CaptureFlagInput:
-      // If the current node has an input flag, capture it in InputFlag.
+
+    case OPC_CaptureGlueInput:
+      // If the current node has an input glue, capture it in InputGlue.
       if (N->getNumOperands() != 0 &&
-          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
-        InputFlag = N->getOperand(N->getNumOperands()-1);
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+        InputGlue = N->getOperand(N->getNumOperands()-1);
       continue;
-        
+
     case OPC_MoveChild: {
       unsigned ChildNo = MatcherTable[MatcherIndex++];
       if (ChildNo >= N.getNumOperands())
@@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       NodeStack.push_back(N);
       continue;
     }
-        
+
     case OPC_MoveParent:
       // Pop the current node off the NodeStack.
       NodeStack.pop_back();
       assert(!NodeStack.empty() && "Node stack imbalance!");
-      N = NodeStack.back();  
+      N = NodeStack.back();
       continue;
-     
+
     case OPC_CheckSame:
       if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
       continue;
@@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       unsigned CPNum = MatcherTable[MatcherIndex++];
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
-      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+                               RecordedNodes[RecNo].first, CPNum,
                                RecordedNodes))
         break;
       continue;
@@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckOpcode:
       if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
       continue;
-        
+
     case OPC_CheckType:
       if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
       continue;
-        
+
     case OPC_SwitchOpcode: {
       unsigned CurNodeOpcode = N.getOpcode();
       unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // If the opcode matches, then we will execute this case.
         if (CurNodeOpcode == Opc)
           break;
-      
+
         // Otherwise, skip over this case.
         MatcherIndex += CaseSize;
       }
-      
+
       // If no cases matched, bail out.
       if (CaseSize == 0) break;
-      
+
       // Otherwise, execute the case we found.
       DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
                    << " to " << MatcherIndex << "\n");
       continue;
     }
-        
+
     case OPC_SwitchType: {
-      MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+      MVT CurNodeVT = N.getValueType().getSimpleVT();
       unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
       unsigned CaseSize;
       while (1) {
@@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (CaseSize & 128)
           CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
         if (CaseSize == 0) break;
-        
-        MVT::SimpleValueType CaseVT =
-          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+        MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
         if (CaseVT == MVT::iPTR)
-          CaseVT = TLI.getPointerTy().SimpleTy;
-        
+          CaseVT = TLI.getPointerTy();
+
         // If the VT matches, then we will execute this case.
         if (CurNodeVT == CaseVT)
           break;
-        
+
         // Otherwise, skip over this case.
         MatcherIndex += CaseSize;
       }
-      
+
       // If no cases matched, bail out.
       if (CaseSize == 0) break;
-      
+
       // Otherwise, execute the case we found.
       DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
                    << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckOrImm:
       if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
       continue;
-        
+
     case OPC_CheckFoldableChainNode: {
       assert(NodeStack.size() != 1 && "No parent node");
       // Verify that all intermediate nodes between the root and this one have
@@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                          NodeToMatch, OptLevel,
                          true/*We validate our own chains*/))
         break;
-      
+
       continue;
     }
     case OPC_EmitInteger: {
@@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       int64_t Val = MatcherTable[MatcherIndex++];
       if (Val & 128)
         Val = GetVBR(Val, MatcherTable, MatcherIndex);
-      RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
       continue;
     }
     case OPC_EmitRegister: {
       MVT::SimpleValueType VT =
         (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
       unsigned RegNo = MatcherTable[MatcherIndex++];
-      RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
       continue;
     }
-        
+
     case OPC_EmitConvertToTarget:  {
       // Convert from IMM/FPIMM to target version.
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      SDValue Imm = RecordedNodes[RecNo];
+      SDValue Imm = RecordedNodes[RecNo].first;
 
       if (Imm->getOpcode() == ISD::Constant) {
         int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
         Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
       }
-      
-      RecordedNodes.push_back(Imm);
+
+      RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
       continue;
     }
-        
+
     case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0
     case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1
       // These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
              "EmitMergeInputChains should be the first chain producing node");
       assert(ChainNodesMatched.empty() &&
              "Should only have one EmitMergeInputChains per match");
-      
+
       // Read all of the chained nodes.
       unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-        
+      ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
       // FIXME: What if other value results of the node have uses not matched
       // by this pattern?
       if (ChainNodesMatched.back() != NodeToMatch &&
-          !RecordedNodes[RecNo].hasOneUse()) {
+          !RecordedNodes[RecNo].first.hasOneUse()) {
         ChainNodesMatched.clear();
         break;
       }
-      
+
       // Merge the input chains if they are not intra-pattern references.
       InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-      
+
       if (InputChain.getNode() == 0)
         break;  // Failed to merge.
       continue;
     }
-        
+
     case OPC_EmitMergeInputChains: {
       assert(InputChain.getNode() == 0 &&
              "EmitMergeInputChains should be the first chain producing node");
@@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       for (unsigned i = 0; i != NumChains; ++i) {
         unsigned RecNo = MatcherTable[MatcherIndex++];
         assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-        ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-        
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
         // FIXME: What if other value results of the node have uses not matched
         // by this pattern?
         if (ChainNodesMatched.back() != NodeToMatch &&
-            !RecordedNodes[RecNo].hasOneUse()) {
+            !RecordedNodes[RecNo].first.hasOneUse()) {
           ChainNodesMatched.clear();
           break;
         }
       }
-      
+
       // If the inner loop broke out, the match fails.
       if (ChainNodesMatched.empty())
         break;
 
       // Merge the input chains if they are not intra-pattern references.
       InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-      
+
       if (InputChain.getNode() == 0)
         break;  // Failed to merge.
 
       continue;
     }
-        
+
     case OPC_EmitCopyToReg: {
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
       unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-      
+
       if (InputChain.getNode() == 0)
         InputChain = CurDAG->getEntryNode();
-      
+
       InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
-                                        DestPhysReg, RecordedNodes[RecNo],
-                                        InputFlag);
-      
-      InputFlag = InputChain.getValue(1);
+                                        DestPhysReg, RecordedNodes[RecNo].first,
+                                        InputGlue);
+
+      InputGlue = InputChain.getValue(1);
       continue;
     }
-        
+
     case OPC_EmitNodeXForm: {
       unsigned XFormNo = MatcherTable[MatcherIndex++];
       unsigned RecNo = MatcherTable[MatcherIndex++];
       assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-      RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+      SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+      RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
       continue;
     }
-        
+
     case OPC_EmitNode:
     case OPC_MorphNodeTo: {
       uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
         VTs.push_back(VT);
       }
-      
+
       if (EmitNodeInfo & OPFL_Chain)
         VTs.push_back(MVT::Other);
-      if (EmitNodeInfo & OPFL_FlagOutput)
-        VTs.push_back(MVT::Flag);
-      
+      if (EmitNodeInfo & OPFL_GlueOutput)
+        VTs.push_back(MVT::Glue);
+
       // This is hot code, so optimize the two most common cases of 1 and 2
       // results.
       SDVTList VTList;
@@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         unsigned RecNo = MatcherTable[MatcherIndex++];
         if (RecNo & 128)
           RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-        
+
         assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
-        Ops.push_back(RecordedNodes[RecNo]);
+        Ops.push_back(RecordedNodes[RecNo].first);
       }
-      
+
       // If there are variadic operands to add, handle them now.
       if (EmitNodeInfo & OPFL_VariadicInfo) {
         // Determine the start index to copy from.
@@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
         assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
                "Invalid variadic node");
-        // Copy all of the variadic operands, not including a potential flag
+        // Copy all of the variadic operands, not including a potential glue
         // input.
         for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
              i != e; ++i) {
           SDValue V = NodeToMatch->getOperand(i);
-          if (V.getValueType() == MVT::Flag) break;
+          if (V.getValueType() == MVT::Glue) break;
           Ops.push_back(V);
         }
       }
-      
-      // If this has chain/flag inputs, add them.
+
+      // If this has chain/glue inputs, add them.
       if (EmitNodeInfo & OPFL_Chain)
         Ops.push_back(InputChain);
-      if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
-        Ops.push_back(InputFlag);
-      
+      if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+        Ops.push_back(InputGlue);
+
       // Create the node.
       SDNode *Res = 0;
       if (Opcode != OPC_MorphNodeTo) {
@@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // add the results to the RecordedNodes list.
         Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
                                      VTList, Ops.data(), Ops.size());
-        
-        // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+        // Add all the non-glue/non-chain results to the RecordedNodes list.
         for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
-          if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
-          RecordedNodes.push_back(SDValue(Res, i));
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+          RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+                                                             (SDNode*) 0));
         }
-        
+
       } else {
         Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
                         EmitNodeInfo);
       }
-      
-      // If the node had chain/flag results, update our notion of the current
-      // chain and flag.
-      if (EmitNodeInfo & OPFL_FlagOutput) {
-        InputFlag = SDValue(Res, VTs.size()-1);
+
+      // If the node had chain/glue results, update our notion of the current
+      // chain and glue.
+      if (EmitNodeInfo & OPFL_GlueOutput) {
+        InputGlue = SDValue(Res, VTs.size()-1);
         if (EmitNodeInfo & OPFL_Chain)
           InputChain = SDValue(Res, VTs.size()-2);
       } else if (EmitNodeInfo & OPFL_Chain)
         InputChain = SDValue(Res, VTs.size()-1);
 
-      // If the OPFL_MemRefs flag is set on this node, slap all of the
+      // If the OPFL_MemRefs glue is set on this node, slap all of the
       // accumulated memrefs onto it.
       //
       // FIXME: This is vastly incorrect for patterns with multiple outputs
@@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         cast<MachineSDNode>(Res)
           ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
       }
-      
+
       DEBUG(errs() << "  "
                    << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
                    << " node: "; Res->dump(CurDAG); errs() << "\n");
-      
+
       // If this was a MorphNodeTo then we're completely done!
       if (Opcode == OPC_MorphNodeTo) {
-        // Update chain and flag uses.
-        UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
-                             InputFlag, FlagResultNodesMatched, true);
+        // Update chain and glue uses.
+        UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                            InputGlue, GlueResultNodesMatched, true);
         return Res;
       }
-      
+
       continue;
     }
-        
-    case OPC_MarkFlagResults: {
+
+    case OPC_MarkGlueResults: {
       unsigned NumNodes = MatcherTable[MatcherIndex++];
-      
-      // Read and remember all the flag-result nodes.
+
+      // Read and remember all the glue-result nodes.
       for (unsigned i = 0; i != NumNodes; ++i) {
         unsigned RecNo = MatcherTable[MatcherIndex++];
         if (RecNo & 128)
           RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
 
         assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
-        FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+        GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
       }
       continue;
     }
-      
+
     case OPC_CompleteMatch: {
       // The match has been completed, and any new nodes (if any) have been
       // created.  Patch up references to the matched dag to use the newly
@@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         unsigned ResSlot = MatcherTable[MatcherIndex++];
         if (ResSlot & 128)
           ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-        
+
         assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
-        SDValue Res = RecordedNodes[ResSlot];
-        
+        SDValue Res = RecordedNodes[ResSlot].first;
+
         assert(i < NodeToMatch->getNumValues() &&
                NodeToMatch->getValueType(i) != MVT::Other &&
-               NodeToMatch->getValueType(i) != MVT::Flag &&
+               NodeToMatch->getValueType(i) != MVT::Glue &&
                "Invalid number of results to complete!");
         assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
                 NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
       }
 
-      // If the root node defines a flag, add it to the flag nodes to update
-      // list.
-      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
-        FlagResultNodesMatched.push_back(NodeToMatch);
-      
-      // Update chain and flag uses.
-      UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
-                           InputFlag, FlagResultNodesMatched, false);
-      
+      // If the root node defines glue, add it to the glue nodes to update list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+        GlueResultNodesMatched.push_back(NodeToMatch);
+
+      // Update chain and glue uses.
+      UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                          InputGlue, GlueResultNodesMatched, false);
+
       assert(NodeToMatch->use_empty() &&
              "Didn't replace all uses of the node?");
-      
+
       // FIXME: We just return here, which interacts correctly with SelectRoot
       // above.  We should fix this to not return an SDNode* anymore.
       return 0;
     }
     }
-    
+
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
@@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
-      
+
       DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
-    
+
       InputChain = LastScope.InputChain;
-      InputFlag = LastScope.InputFlag;
+      InputGlue = LastScope.InputGlue;
       if (!LastScope.HasChainNodesMatched)
         ChainNodesMatched.clear();
-      if (!LastScope.HasFlagResultNodesMatched)
-        FlagResultNodesMatched.clear();
+      if (!LastScope.HasGlueResultNodesMatched)
+        GlueResultNodesMatched.clear();
 
       // Check to see what the offset is at the new MatcherIndex.  If it is zero
       // we have reached the end of this scope, otherwise we have another child
@@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         LastScope.FailIndex = MatcherIndex+NumToSkip;
         break;
       }
-      
+
       // End of this scope, pop it and try the next child in the containing
       // scope.
       MatchScopes.pop_back();
     }
   }
 }
-    
+
 
 
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
-  Msg << "Cannot yet select: ";
-  
+  Msg << "Cannot select: ";
+
   if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
       N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
       N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5e32bb..76eb9453561e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -93,7 +93,7 @@ namespace llvm {
     static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
       SDValue Op = EI.getNode()->getOperand(EI.getOperand());
       EVT VT = Op.getValueType();
-      if (VT == MVT::Flag)
+      if (VT == MVT::Glue)
         return "color=red,style=bold";
       else if (VT == MVT::Other)
         return "color=blue,style=dashed";
@@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
   raw_string_ostream O(s);
   O << "SU(" << SU->NodeNum << "): ";
   if (SU->getNode()) {
-    SmallVector<SDNode *, 4> FlaggedNodes;
-    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-      FlaggedNodes.push_back(N);
-    while (!FlaggedNodes.empty()) {
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
       O << DOTGraphTraits<SelectionDAG*>
-        ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
-      FlaggedNodes.pop_back();
-      if (!FlaggedNodes.empty())
+        ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+      GluedNodes.pop_back();
+      if (!GluedNodes.empty())
         O << "\n    ";
     }
   } else {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b74f600cfa2d..691390e2a0e4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <cctype>
 using namespace llvm;
 
 namespace llvm {
@@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
       setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
       setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
     }
-    
+
     // These operations default to expand.
     setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
 
   // Most targets ignore the @llvm.prefetch intrinsic.
   setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-  
-  // ConstantFP nodes default to expand.  Targets can either change this to 
+
+  // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
-    
+
   IsLittleEndian = TD->isLittleEndian();
   ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
   memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
   memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
   maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+    = maxStoresPerMemmoveOptSize = 4;
   benefitFromCodePlacementOpt = false;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
   SelectIsExpensive = false;
   IntDivIsCheap = false;
   Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
   StackPointerRegisterToSaveRestore = 0;
   ExceptionPointerRegister = 0;
   ExceptionSelectorRegister = 0;
@@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   // Figure out the right, legal destination reg to copy into.
   unsigned NumElts = VT.getVectorNumElements();
   MVT EltTy = VT.getVectorElementType();
-  
+
   unsigned NumVectorRegs = 1;
-  
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
   // could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(NumElts)) {
     NumVectorRegs = NumElts;
     NumElts = 1;
   }
-  
+
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
   while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   }
 
   NumIntermediates = NumVectorRegs;
-  
+
   MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
   if (!TLI->isTypeLegal(NewVT))
     NewVT = EltTy;
@@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-  
+
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
   return NumVectorRegs;
@@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {
     RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
     TransformToType[MVT::ppcf128] = MVT::f64;
     ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
-  }    
+  }
 
   // Decide how to handle f64. If the target does not have native f64 support,
   // expand it to i64 and we will be generating soft float library calls.
@@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {
       ValueTypeActions.setTypeAction(MVT::f32, Expand);
     }
   }
-  
+
   // Loop over all of the vector value types to see which need transformations.
   for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
     MVT VT = (MVT::SimpleValueType)i;
     if (isTypeLegal(VT)) continue;
-    
+
     // Determine if there is a legal wider type.  If so, we should promote to
     // that wider vector type.
     EVT EltVT = VT.getVectorElementType();
@@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {
       for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
         EVT SVT = (MVT::SimpleValueType)nVT;
         if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts && 
-            isTypeSynthesizable(SVT)) {
+            SVT.getVectorNumElements() > NElts &&
+            isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
           NumRegistersForVT[i] = 1;
@@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {
       }
       if (IsLegalWiderType) continue;
     }
-    
+
     MVT IntermediateVT;
     EVT RegisterVT;
     unsigned NumIntermediates;
@@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {
       getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
                                 RegisterVT, this);
     RegisterTypeForVT[i] = RegisterVT;
-    
+
     EVT NVT = VT.getPow2VectorType();
     if (NVT == VT) {
       // Type is already a power of 2.  The default action is to split.
@@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                                 unsigned &NumIntermediates,
                                                 EVT &RegisterVT) const {
   unsigned NumElts = VT.getVectorNumElements();
-  
+
   // If there is a wider vector type with the same element type as this one,
   // we should widen to that legal vector type.  This handles things like
   // <2 x float> -> <4 x float>.
@@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
       return 1;
     }
   }
-  
+
   // Figure out the right, legal destination reg to copy into.
   EVT EltTy = VT.getVectorElementType();
-  
+
   unsigned NumVectorRegs = 1;
-  
-  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we 
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
   // could break down into LHS/RHS like LegalizeDAG does.
   if (!isPowerOf2_32(NumElts)) {
     NumVectorRegs = NumElts;
     NumElts = 1;
   }
-  
+
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
   while (NumElts > 1 && !isTypeLegal(
@@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   }
 
   NumIntermediates = NumVectorRegs;
-  
+
   EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
   if (!isTypeLegal(NewVT))
     NewVT = EltTy;
@@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   RegisterVT = DestVT;
   if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
     return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-  
+
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
   return NumVectorRegs;
 }
 
-/// Get the EVTs and ArgFlags collections that represent the legalized return 
+/// Get the EVTs and ArgFlags collections that represent the legalized return
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.
 /// TODO: Move this out of TargetLowering.cpp.
@@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {
   // In non-pic modes, just use the address of a block.
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return MachineJumpTableInfo::EK_BlockAddress;
-  
+
   // In PIC mode, if the target supports a GPRel32 directive, use it.
   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-  
+
   // Otherwise, use a label difference.
   return MachineJumpTableInfo::EK_LabelDifference32;
 }
@@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 //  Optimization Methods
 //===----------------------------------------------------------------------===//
 
-/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
 /// specified instruction is a constant integer.  If so, check to see if there
 /// are any bits set in the constant that are not demanded.  If so, shrink the
 /// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, 
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
                                                         const APInt &Demanded) {
   DebugLoc dl = Op.getDebugLoc();
 
@@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
       EVT VT = Op.getValueType();
       SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
                                 DAG.getConstant(Demanded &
-                                                C->getAPIntValue(), 
+                                                C->getAPIntValue(),
                                                 VT));
       return CombineTo(Op, New);
     }
@@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   KnownZero = KnownOne = APInt(BitWidth, 0);
 
   // Other users may use these bits.
-  if (!Op.getNode()->hasOneUse()) { 
+  if (!Op.getNode()->hasOneUse()) {
     if (Depth != 0) {
-      // If not at the root, Just compute the KnownZero/KnownOne bits to 
+      // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
       TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
       return false;
@@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If this is the root being simplified, allow it to have multiple uses,
     // just set the NewMask to all bits.
     NewMask = APInt::getAllOnesValue(BitWidth);
-  } else if (DemandedMask == 0) {   
+  } else if (DemandedMask == 0) {
     // Not demanding any bits from Op.
     if (Op.getOpcode() != ISD::UNDEF)
       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // the RHS.
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt LHSZero, LHSOne;
+      // Do not increment Depth here; that can cause an infinite loop.
       TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
-                                LHSZero, LHSOne, Depth+1);
+                                LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
         return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
         return true;
     }
-    
+
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
                              KnownZero2, KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-      
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known one on one side, return the other.
     // These bits cannot contribute to the result of the 'and'.
     if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownZero |= KnownZero2;
     break;
   case ISD::OR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
                              KnownZero2, KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'or'.
     if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne |= KnownOne2;
     break;
   case ISD::XOR:
-    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If all of the demanded bits are known zero on one side, return the other.
     // These bits cannot contribute to the result of the 'xor'.
     if ((KnownZero & NewMask) == NewMask)
@@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
                                                Op.getOperand(0),
                                                Op.getOperand(1)));
-    
+
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
     KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    
+
     // If all of the demanded bits on one side are known, and all of the set
     // bits on that side are also known to be set on the other side, turn this
     // into an AND, as we know the bits will be cleared.
@@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if ((KnownOne & KnownOne2) == KnownOne) {
         EVT VT = Op.getValueType();
         SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, 
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
                                                  Op.getOperand(0), ANDC));
       }
     }
-    
+
     // If the RHS is a constant, see if we can simplify it.
     // for XOR, we prefer to force bits to 1 if they will make a -1.
     // if we can't force bits, try to shrink constant
@@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne  = KnownOneOut;
     break;
   case ISD::SELECT:
-    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
-    
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
     break;
   case ISD::SELECT_CC:
-    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, 
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
     if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
                              KnownOne2, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
     // If the operands are constants, see if we can simplify them.
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
-      
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
@@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           if (Diff < 0) {
             Diff = -Diff;
             Opc = ISD::SRL;
-          }          
-          
-          SDValue NewSA = 
+          }
+
+          SDValue NewSA =
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
           EVT VT = Op.getValueType();
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
-      }      
-      
+      }
+
       if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
@@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       unsigned ShAmt = SA->getZExtValue();
       unsigned VTSize = VT.getSizeInBits();
       SDValue InOp = Op.getOperand(0);
-      
+
       // If the shift count is an invalid immediate, don't do anything.
       if (ShAmt >= BitWidth)
         break;
@@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           if (Diff < 0) {
             Diff = -Diff;
             Opc = ISD::SHL;
-          }          
-          
+          }
+
           SDValue NewSA =
             TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
           return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
                                                    InOp.getOperand(0), NewSA));
         }
-      }      
-      
+      }
+
       // Compute the new bits that are at the top now.
       if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
 
@@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       EVT VT = Op.getValueType();
       unsigned ShAmt = SA->getZExtValue();
-      
+
       // If the shift count is an invalid immediate, don't do anything.
       if (ShAmt >= BitWidth)
         break;
@@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
         InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-      
+
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
-      
+
       // Handle the sign bit, adjusted to where it is now in the mask.
       APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-      
+
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
       if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, 
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
                                                  Op.getOperand(0),
                                                  Op.getOperand(1)));
       } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 
-    // Sign extension.  Compute the demanded bits in the result that are not 
+    // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
                             BitWidth - EVT.getScalarType().getSizeInBits());
-    
+
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
-    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
-    InSignBit.zext(BitWidth);
+    APInt InSignBit =
+      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
                            EVT.getScalarType().getSizeInBits()) &
       NewMask;
-    
+
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     InputDemandedBits |= InSignBit;
@@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     // If the sign bit of the input is known set or clear, then we know the
     // top bits of the result.
-    
+
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
-      return TLO.CombineTo(Op, 
+      return TLO.CombineTo(Op,
                            TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-    
+
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
       KnownZero &= ~NewBits;
@@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::ZERO_EXTEND: {
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt InMask = NewMask;
-    InMask.trunc(OperandBitWidth);
-    
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+
     // If none of the top bits are demanded, convert this into an any_extend.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
     if (!NewBits.intersects(NewMask))
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
-                                               Op.getValueType(), 
+                                               Op.getValueType(),
                                                Op.getOperand(0)));
-    
+
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
     break;
   }
@@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
-    
+
     // If none of the top bits are demanded, convert this into an any_extend.
     if (NewBits == 0)
       return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
                                               Op.getValueType(),
                                               Op.getOperand(0)));
-    
+
     // Since some of the sign extended bits are demanded, we know that the sign
     // bit is demanded.
     APInt InDemandedBits = InMask & NewMask;
     InDemandedBits |= InSignBit;
-    InDemandedBits.trunc(InBits);
-    
-    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+    InDemandedBits = InDemandedBits.trunc(InBits);
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
-    
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
     // If the sign bit is known zero, convert this to a zero extend.
     if (KnownZero.intersects(InSignBit))
       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
-                                               Op.getValueType(), 
+                                               Op.getValueType(),
                                                Op.getOperand(0)));
-    
+
     // If the sign bit is known one, the top bits match.
     if (KnownOne.intersects(InSignBit)) {
       KnownOne  |= NewBits;
@@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::ANY_EXTEND: {
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt InMask = NewMask;
-    InMask.trunc(OperandBitWidth);
+    APInt InMask = NewMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
     break;
   }
   case ISD::TRUNCATE: {
@@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // zero/one bits live out.
     unsigned OperandBitWidth =
       Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
-    APInt TruncMask = NewMask;
-    TruncMask.zext(OperandBitWidth);
+    APInt TruncMask = NewMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
-    
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+
     // If the input is only used by this truncate, see if we can shrink it based
     // on the known demanded bits.
     if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           break;
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
                                                OperandBitWidth - BitWidth);
-        HighBits = HighBits.lshr(ShAmt->getZExtValue());
-        HighBits.trunc(BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
 
         if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
           SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
-                                             Op.getValueType(), 
+                                             Op.getValueType(),
                                              In.getOperand(0));
           return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
                                                    Op.getValueType(),
-                                                   NewTrunc, 
+                                                   NewTrunc,
                                                    In.getOperand(1)));
         }
         break;
       }
     }
-    
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     break;
   }
   case ISD::AssertZext: {
@@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth,
@@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownZero |= ~InMask & NewMask;
     break;
   }
-  case ISD::BIT_CONVERT:
+  case ISD::BITCAST:
 #if 0
     // If this is an FP->Int bitcast and if the sign bit is the only thing that
     // is demanded, turn this into a FGETSIGN.
@@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
           isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
-        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), 
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
                                          Op.getOperand(0));
         unsigned ShVal = Op.getValueType().getSizeInBits()-1;
         SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
@@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
     break;
   }
-  
+
   // If we know the value of all of the demanded bits, return this as a
   // constant.
   if ((NewMask & (KnownZero|KnownOne)) == NewMask)
     return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-  
+
   return false;
 }
 
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified 
-/// in Mask are known to be either zero or one and return them in the 
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
                                                     const APInt &Mask,
-                                                    APInt &KnownZero, 
+                                                    APInt &KnownZero,
                                                     APInt &KnownOne,
                                                     const SelectionDAG &DAG,
                                                     unsigned Depth) const {
@@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
          (KnownOne.countPopulation() == 1);
 }
 
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands 
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
 /// and cc. If it is unable to simplify it, return a null SDValue.
 SDValue
 TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       }
     }
 
+    SDValue CTPOP = N0;
+    // Look through truncs that don't change the value of a ctpop.
+    if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+      CTPOP = N0.getOperand(0);
+
+    if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+      EVT CTVT = CTPOP.getValueType();
+      SDValue CTOp = CTPOP.getOperand(0);
+
+      // (ctpop x) u< 2 -> (x & x-1) == 0
+      // (ctpop x) u> 1 -> (x & x-1) != 0
+      if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+                                  DAG.getConstant(1, CTVT));
+        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+        ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+      }
+
+      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+    }
+
     // If the LHS is '(and load, const)', the RHS is 0,
     // the test is for equality or unsigned, and all 1 bits of the const are
     // in the same partial word, see if we can shorten the load.
@@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!Lod->isVolatile() && Lod->isUnindexed()) {
         unsigned origWidth = N0.getValueType().getSizeInBits();
         unsigned maskWidth = origWidth;
-        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
         // 8 bits, but have to be careful...
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               DAG.getConstant(bestOffset, PtrType));
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
-                                        Lod->getSrcValue(), 
-                                        Lod->getSrcValueOffset() + bestOffset,
+                                Lod->getPointerInfo().getWithOffset(bestOffset),
                                         false, false, NewAlign);
-          return DAG.getSetCC(dl, VT, 
+          return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
                                                       newVT)),
@@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
             (isOperationLegal(ISD::SETCC, newVT) &&
               getCondCodeAction(Cond, newVT)==Legal))
           return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                              DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+                              DAG.getConstant(C1.trunc(InSize), newVT),
                               Cond);
         break;
       }
@@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // the sign extension, it is impossible for both sides to be equal.
       if (C1.getMinSignedBits() > ExtSrcTyBits)
         return DAG.getConstant(Cond == ISD::SETNE, VT);
-      
+
       SDValue ZextOp;
       EVT Op0Ty = N0.getOperand(0).getValueType();
       if (Op0Ty == ExtSrcTy) {
@@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (!DCI.isCalledByLegalizer())
         DCI.AddToWorklist(ZextOp.getNode());
       // Otherwise, make this a use of a zext.
-      return DAG.getSetCC(dl, VT, ZextOp, 
+      return DAG.getSetCC(dl, VT, ZextOp,
                           DAG.getConstant(C1 & APInt::getLowBitsSet(
                                                               ExtDstTyBits,
-                                                              ExtSrcTyBits), 
+                                                              ExtSrcTyBits),
                                           ExtDstTy),
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
-          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);        
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
         // Invert the condition.
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-        CC = ISD::getSetCCInverse(CC, 
+        CC = ISD::getSetCCInverse(CC,
                                   N0.getOperand(0).getValueType().isInteger());
         return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
 
       if ((N0.getOpcode() == ISD::XOR ||
-           (N0.getOpcode() == ISD::AND && 
+           (N0.getOpcode() == ISD::AND &&
             N0.getOperand(0).getOpcode() == ISD::XOR &&
             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
           isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           if (N0.getOpcode() == ISD::XOR)
             Val = N0.getOperand(0);
           else {
-            assert(N0.getOpcode() == ISD::AND && 
+            assert(N0.getOpcode() == ISD::AND &&
                     N0.getOperand(0).getOpcode() == ISD::XOR);
             // ((X^1)&1)^1 -> X & 1
             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
     }
-    
+
     APInt MinVal, MaxVal;
     unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
     if (ISD::isSignedIntSetCC(Cond)) {
@@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
       if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
       // X >= C0 --> X > (C0-1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(C1-1, N1.getValueType()),
                           (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
     }
@@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
       if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
       // X <= C0 --> X < (C0+1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(C1+1, N1.getValueType()),
                           (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
     }
@@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If we have setult X, 1, turn it into seteq X, 0
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
-      return DAG.getSetCC(dl, VT, N0, 
-                          DAG.getConstant(MinVal, N0.getValueType()), 
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MinVal, N0.getValueType()),
                           ISD::SETEQ);
     // If we have setugt X, Max-1, turn it into seteq X, Max
     else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(MaxVal, N0.getValueType()),
                           ISD::SETEQ);
 
@@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // by changing cc.
 
     // SETUGT X, SINTMAX  -> SETLT X, 0
-    if (Cond == ISD::SETUGT && 
+    if (Cond == ISD::SETUGT &&
         C1 == APInt::getSignedMaxValue(OperandBitSize))
-      return DAG.getSetCC(dl, VT, N0, 
+      return DAG.getSetCC(dl, VT, N0,
                           DAG.getConstant(0, N1.getValueType()),
                           ISD::SETLT);
 
@@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         return DAG.getUNDEF(VT);
       }
     }
-    
+
     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
     // constant if knowing that the operand is non-nan is enough.  We prefer to
     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (DAG.isCommutativeBinOp(N0.getOpcode())) {
           // If X op Y == Y op X, try other combinations.
           if (N0.getOperand(0) == N1.getOperand(1))
-            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), 
+            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
                                 Cond);
           if (N0.getOperand(1) == N1.getOperand(0))
-            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), 
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
                                 Cond);
         }
       }
-      
+
       if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
         if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           // Turn (X+C1) == C2 --> X == C2-C1
@@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                                 LHSR->getAPIntValue(),
                                 N0.getValueType()), Cond);
           }
-          
+
           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
           if (N0.getOpcode() == ISD::XOR)
             // If we know that all of the inverted bits are zero, don't bother
@@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                              N0.getValueType()),
                              Cond);
         }
-        
+
         // Turn (C1-X) == C2 --> X == C1-C2
         if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                            N0.getValueType()),
                            Cond);
           }
-        }          
+        }
       }
 
       // Simplify (X+Z) == X -->  Z == 0
@@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
           // (Z-X) == X  --> Z == X<<1
           SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
-                                     N1, 
+                                     N1,
                                      DAG.getConstant(1, getShiftAmountTy()));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
@@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         } else if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
           // X == (Z-X)  --> X<<1 == Z
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, 
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
                                      DAG.getConstant(1, getShiftAmountTy()));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
@@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
 /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
 /// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
                                     int64_t &Offset) const {
   if (isa<GlobalAddressSDNode>(N)) {
     GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
       }
     }
   }
+  
   return false;
 }
 
@@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
       return C_Memory;
     case 'i':    // Simple Integer or Relocatable Constant
     case 'n':    // Simple Integer
+    case 'E':    // Floating Point Constant
+    case 'F':    // Floating Point Constant
     case 's':    // Relocatable Constant
+    case 'p':    // Address.
     case 'X':    // Allow ANY value.
     case 'I':    // Target registers.
     case 'J':
@@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'N':
     case 'O':
     case 'P':
+    case '<':
+    case '>':
       return C_Other;
     }
   }
-  
-  if (Constraint.size() > 1 && Constraint[0] == '{' && 
+
+  if (Constraint.size() > 1 && Constraint[0] == '{' &&
       Constraint[Constraint.size()-1] == '}')
     return C_Register;
   return C_Unknown;
@@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
     // is possible and fine if either GV or C are missing.
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
     GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-    
+
     // If we have "(add GV, C)", pull out GV/C
     if (Op.getOpcode() == ISD::ADD) {
       C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       if (C == 0 || GA == 0)
         C = 0, GA = 0;
     }
-    
+
     // If we find a valid operand, map to the TargetXXX version so that the
     // value itself doesn't get selected.
     if (GA) {   // Either &GV   or   &GV+C
       if (ConstraintLetter != 'n') {
         int64_t Offs = GA->getOffset();
         if (C) Offs += C->getZExtValue();
-        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), 
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
                                                  C ? C->getDebugLoc() : DebugLoc(),
                                                  Op.getValueType(), Offs));
         return;
@@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
   for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
        E = RI->regclass_end(); RCI != E; ++RCI) {
     const TargetRegisterClass *RC = *RCI;
-    
-    // If none of the value types for this register class are valid, we 
+
+    // If none of the value types for this register class are valid, we
     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
     bool isLegal = false;
     for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
         break;
       }
     }
-    
+
     if (!isLegal) continue;
-    
-    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 
+
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
          I != E; ++I) {
       if (RegName.equals_lower(RI->getName(*I)))
         return std::make_pair(*I, RC);
     }
   }
-  
+
   return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
 }
 
@@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 }
 
 
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+    ImmutableCallSite CS) const {
+  /// ConstraintOperands - Information about all of the constraints.
+  AsmOperandInfoVector ConstraintOperands;
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+  unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  InlineAsm::ConstraintInfoVector
+    ConstraintInfos = IA->ParseConstraints();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    // Update multiple alternative constraint count.
+    if (OpInfo.multipleAlternatives.size() > maCount)
+      maCount = OpInfo.multipleAlternatives.size();
+
+    OpInfo.ConstraintVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpInfo.ConstraintVT = getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    if (OpInfo.CallOperandVal) {
+      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+      if (OpInfo.isIndirect) {
+        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+        if (!PtrTy)
+          report_fatal_error("Indirect operand for inline asm not a pointer!");
+        OpTy = PtrTy->getElementType();
+      }
+      // If OpTy is not a single value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          OpInfo.ConstraintVT =
+              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+          break;
+        }
+      } else if (dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else {
+        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+      }
+    }
+  }
+
+  // If we have multiple alternative constraints, select the best alternative.
+  if (ConstraintInfos.size()) {
+    if (maCount) {
+      unsigned bestMAIndex = 0;
+      int bestWeight = -1;
+      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
+      int weight = -1;
+      unsigned maIndex;
+      // Compute the sums of the weights for each alternative, keeping track
+      // of the best (highest weight) one so far.
+      for (maIndex = 0; maIndex < maCount; ++maIndex) {
+        int weightSum = 0;
+        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+            cIndex != eIndex; ++cIndex) {
+          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+          if (OpInfo.Type == InlineAsm::isClobber)
+            continue;
+
+          // If this is an output operand with a matching input operand,
+          // look up the matching input. If their types mismatch, e.g. one
+          // is an integer, the other is floating point, or their sizes are
+          // different, flag it as an maCantMatch.
+          if (OpInfo.hasMatchingInput()) {
+            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+              if ((OpInfo.ConstraintVT.isInteger() !=
+                   Input.ConstraintVT.isInteger()) ||
+                  (OpInfo.ConstraintVT.getSizeInBits() !=
+                   Input.ConstraintVT.getSizeInBits())) {
+                weightSum = -1;  // Can't match.
+                break;
+              }
+            }
+          }
+          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+          if (weight == -1) {
+            weightSum = -1;
+            break;
+          }
+          weightSum += weight;
+        }
+        // Update best.
+        if (weightSum > bestWeight) {
+          bestWeight = weightSum;
+          bestMAIndex = maIndex;
+        }
+      }
+
+      // Now select chosen alternative in each constraint.
+      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+          cIndex != eIndex; ++cIndex) {
+        AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+        if (cInfo.Type == InlineAsm::isClobber)
+          continue;
+        cInfo.selectAlternative(bestMAIndex);
+      }
+    }
+  }
+
+  // Check and hook up tied operands, choose constraint code to use.
+  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+      cIndex != eIndex; ++cIndex) {
+    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+      }
+
+    }
+  }
+
+  return ConstraintOperands;
+}
+
+
 /// getConstraintGenerality - Return an integer indicating how general CT
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   }
 }
 
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getMultipleConstraintMatchWeight(
+    AsmOperandInfo &info, int maIndex) const {
+  InlineAsm::ConstraintCodeVector *rCodes;
+  if (maIndex >= (int)info.multipleAlternatives.size())
+    rCodes = &info.Codes;
+  else
+    rCodes = &info.multipleAlternatives[maIndex].Codes;
+  ConstraintWeight BestWeight = CW_Invalid;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+    ConstraintWeight weight =
+      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+    if (weight > BestWeight)
+      BestWeight = weight;
+  }
+
+  return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+    case 'i': // immediate integer.
+    case 'n': // immediate integer with a known value.
+      if (isa<ConstantInt>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 's': // non-explicit intregal immediate.
+      if (isa<GlobalValue>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 'E': // immediate float if host format.
+    case 'F': // immediate float.
+      if (isa<ConstantFP>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case '<': // memory operand with autodecrement.
+    case '>': // memory operand with autoincrement.
+    case 'm': // memory operand.
+    case 'o': // offsettable memory operand
+    case 'V': // non-offsettable memory operand
+      weight = CW_Memory;
+      break;
+    case 'r': // general register.
+    case 'g': // general register, memory operand or immediate integer.
+              // note: Clang converts "g" to "imr".
+      if (CallOperandVal->getType()->isIntegerTy())
+        weight = CW_Register;
+      break;
+    case 'X': // any operand.
+    default:
+      weight = CW_Default;
+      break;
+  }
+  return weight;
+}
+
 /// ChooseConstraint - If there are multiple different constraints that we
 /// could pick for this operand (e.g. "imr") try to pick the 'best' one.
 /// This is somewhat tricky: constraints fall into four classes:
@@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
         break;
       }
     }
-    
+
     // Things with matching constraints can only be registers, per gcc
     // documentation.  This mainly affects "g" constraints.
     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
       continue;
-    
+
     // This constraint letter is more general than the previous one, use it.
     int Generality = getConstraintGenerality(CType);
     if (Generality > BestGenerality) {
@@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
       BestGenerality = Generality;
     }
   }
-  
+
   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
   OpInfo.ConstraintType = BestType;
 }
@@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
 /// type to use for the specific AsmOperandInfo, setting
 /// OpInfo.ConstraintCode and OpInfo.ConstraintType.
 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
-                                            SDValue Op, 
+                                            SDValue Op,
                                             SelectionDAG *DAG) const {
   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-  
+
   // Single-letter constraints ('r') are very common.
   if (OpInfo.Codes.size() == 1) {
     OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
   } else {
     ChooseConstraint(OpInfo, *this, Op, DAG);
   }
-  
+
   // 'X' matches anything.
   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
     // Labels and constants are handled elsewhere ('X' is the only thing
@@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
       OpInfo.CallOperandVal = v;
       return;
     }
-    
+
     // Otherwise, try to resolve it to something we know about by looking at
     // the actual operand type.
     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                            const Type *Ty) const {
   // The default implementation of this implements a conservative RISCy, r+r and
   // r+i addr mode.
@@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   // Allows a sign-extended 16-bit immediate field.
   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
     return false;
-  
+
   // No global is ever allowed as a base.
   if (AM.BaseGV)
     return false;
-  
-  // Only support r+r, 
+
+  // Only support r+r,
   switch (AM.Scale) {
   case 0:  // "r+i" or just "i", depending on HasBaseReg.
     break;
@@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
     // Allow 2*r as r+r.
     break;
   }
-  
+
   return true;
 }
 
@@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
                                   std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
-  
+
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
   if (!isTypeLegal(VT))
     return SDValue();
-  
+
   APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
   APInt::ms magics = d.magic();
-  
+
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
   SDValue Q;
@@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   else
     return SDValue();       // No mulhs or equvialent
   // If d > 0 and m < 0, add the numerator
-  if (d.isStrictlyPositive() && magics.m.isNegative()) { 
+  if (d.isStrictlyPositive() && magics.m.isNegative()) {
     Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
     if (Created)
       Created->push_back(Q.getNode());
@@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   }
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
-    Q = DAG.getNode(ISD::SRA, dl, VT, Q, 
+    Q = DAG.getNode(ISD::SRA, dl, VT, Q,
                     DAG.getConstant(magics.s, getShiftAmountTy()));
     if (Created)
       Created->push_back(Q.getNode());
@@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
   if (magics.a == 0) {
     assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
            "We shouldn't generate an undefined shift!");
-    return DAG.getNode(ISD::SRL, dl, VT, Q, 
+    return DAG.getNode(ISD::SRL, dl, VT, Q,
                        DAG.getConstant(magics.s, getShiftAmountTy()));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     if (Created)
       Created->push_back(NPQ.getNode());
-    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
                       DAG.getConstant(1, getShiftAmountTy()));
     if (Created)
       Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     if (Created)
       Created->push_back(NPQ.getNode());
-    return DAG.getNode(ISD::SRL, dl, VT, NPQ, 
+    return DAG.getNode(ISD::SRL, dl, VT, NPQ,
                        DAG.getConstant(magics.s-1, getShiftAmountTy()));
   }
 }
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index aeaa38b56433..7b5bca495206 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -226,7 +226,7 @@ bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
   // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
   CSRegSet prevAnticIn = AnticIn[MBB];
   AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
-  if (prevAnticIn |= AnticIn[MBB])
+  if (prevAnticIn != AnticIn[MBB])
     changed = true;
   return changed;
 }
@@ -264,7 +264,7 @@ bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
   // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
   CSRegSet prevAvailOut = AvailOut[MBB];
   AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
-  if (prevAvailOut |= AvailOut[MBB])
+  if (prevAvailOut != AvailOut[MBB])
     changed = true;
   return changed;
 }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b29ea19835bc..2843c1a5b6d8 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "regcoalescing"
 #include "SimpleRegisterCoalescing.h"
 #include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -64,9 +65,25 @@ DisablePhysicalJoin("disable-physical-join",
                cl::desc("Avoid coalescing physical register copies"),
                cl::init(false), cl::Hidden);
 
-INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
                 "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true);
+                false, false, true)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
+                "simple-register-coalescing", "Simple Register Coalescing", 
+                false, false, true)
 
 char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
 
@@ -75,14 +92,14 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AliasAnalysis>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreservedID(MachineDominatorsID);
-  if (StrongPHIElim)
-    AU.addPreservedID(StrongPHIEliminationID);
-  else
-    AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
   AU.addPreservedID(TwoAddressInstructionPassID);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -124,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // Get the location that B is defined at.  Two options: either this value has
   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->getCopy()) return false;
+  if (!BValNo->isDefByCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
@@ -218,7 +235,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
         continue;
       LiveInterval &SRLI = li_->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0, true,
+                              SRLI.getNextValue(FillerStart, 0,
                                                 li_->getVNInfoAllocator())));
     }
   }
@@ -266,9 +283,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
     for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
       if (BI->valno == BValNo)
         continue;
-      // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
-      if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
-        continue;
       if (BI->start <= AI->start && BI->end > AI->start)
         return true;
       if (BI->start > AI->start && BI->start < AI->end)
@@ -278,16 +292,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
   return false;
 }
 
-static void
-TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
-  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
-       i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-  }
-}
-
 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
 /// IntA being the source and IntB being the dest, thus this defines a value
 /// number in IntB.  If the source value number (in IntA) is defined by a
@@ -324,8 +328,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   if (!li_->hasInterval(CP.getDstReg()))
     return false;
 
-  SlotIndex CopyIdx =
-    li_->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
 
   LiveInterval &IntA =
     li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -334,27 +337,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
 
   // BValNo is a value number in B that is defined by a copy from A. 'B3' in
   // the example above.
-  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
-  if (BLR == IntB.end()) return false;
-  VNInfo *BValNo = BLR->valno;
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
 
-  // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
-  // can't process it.
-  if (!BValNo->getCopy()) return false;
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  LiveInterval::iterator ALR =
-    IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); // 
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
 
-  assert(ALR != IntA.end() && "Live range not found!");
-  VNInfo *AValNo = ALR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
-  // tested?
-  if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
-      AValNo->isUnused() || AValNo->hasPHIKill())
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
@@ -411,7 +406,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       return false;
   }
 
-  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
 
   // At this point we have decided that it is legal to do this
   // transformation.  Start by commuting the instruction.
@@ -427,10 +423,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
   NewMI->getOperand(OpIdx).setIsKill();
 
-  bool BHasPHIKill = BValNo->hasPHIKill();
-  SmallVector<VNInfo*, 4> BDeadValNos;
-  std::map<SlotIndex, SlotIndex> BExtend;
-
   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
   // A = or A, B
   // ...
@@ -439,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // C = A<kill>
   // ...
   //   = B
-  bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
-  if (Extended)
-    BExtend[ALR->end] = BLR->end;
 
   // Update uses of IntA of the specific Val# with IntB.
   for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
@@ -467,52 +456,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       UseMO.setReg(NewReg);
     if (UseMI == CopyMI)
       continue;
-    if (UseMO.isKill()) {
-      if (Extended)
-        UseMO.setIsKill(false);
-    }
     if (!UseMI->isCopy())
       continue;
     if (UseMI->getOperand(0).getReg() != IntB.reg ||
         UseMI->getOperand(0).getSubReg())
       continue;
 
-    // This copy will become a noop. If it's defining a new val#,
-    // remove that val# as well. However this live range is being
-    // extended to the end of the existing live range defined by the copy.
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
     SlotIndex DefIdx = UseIdx.getDefIndex();
-    const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
-    if (!DLR)
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
       continue;
-    BHasPHIKill |= DLR->valno->hasPHIKill();
-    assert(DLR->valno->def == DefIdx);
-    BDeadValNos.push_back(DLR->valno);
-    BExtend[DLR->start] = DLR->end;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
     JoinedCopies.insert(UseMI);
   }
 
-  // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
-  // simply extend BLR if CopyMI doesn't end the range.
-  DEBUG({
-      dbgs() << "Extending: ";
-      IntB.print(dbgs(), tri_);
-    });
-
-  // Remove val#'s defined by copies that will be coalesced away.
-  for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
-    VNInfo *DeadVNI = BDeadValNos[i];
-    if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-      for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
-        if (!li_->hasInterval(*AS))
-          continue;
-        LiveInterval &ASLI = li_->getInterval(*AS);
-        if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
-          ASLI.removeValNo(ASLR->valno);
-      }
-    }
-    IntB.removeValNo(BDeadValNos[i]);
-  }
-
   // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
   // is updated.
   VNInfo *ValNo = BValNo;
@@ -521,30 +482,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
-    SlotIndex End = AI->end;
-    std::map<SlotIndex, SlotIndex>::iterator
-      EI = BExtend.find(End);
-    if (EI != BExtend.end())
-      End = EI->second;
-    IntB.addRange(LiveRange(AI->start, End, ValNo));
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
   }
-  ValNo->setHasPHIKill(BHasPHIKill);
-
-  DEBUG({
-      dbgs() << "   result = ";
-      IntB.print(dbgs(), tri_);
-      dbgs() << "\nShortening: ";
-      IntA.print(dbgs(), tri_);
-    });
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
 
   IntA.removeValNo(AValNo);
-
-  DEBUG({
-      dbgs() << "   result = ";
-      IntA.print(dbgs(), tri_);
-      dbgs() << '\n';
-    });
-
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
   ++numCommutes;
   return true;
 }
@@ -644,6 +587,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
 /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
 /// computation, replace the copy by rematerialize the definition.
 bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
                                                        unsigned DstReg,
                                                        unsigned DstSubIdx,
                                                        MachineInstr *CopyMI) {
@@ -652,12 +596,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
   // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
-  // tested?
-  if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
-      ValNo->isUnused() || ValNo->hasPHIKill())
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
     return false;
   MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
   assert(DefMI && "Defining instruction disappeared");
   const TargetInstrDesc &TID = DefMI->getDesc();
   if (!TID.isAsCheapAsAMove())
@@ -681,8 +625,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       return false;
   }
 
-  // If destination register has a sub-register index on it, make sure it mtches
-  // the instruction register class.
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
   if (DstSubIdx) {
     const TargetInstrDesc &TID = DefMI->getDesc();
     if (TID.getNumDefs() != 1)
@@ -699,30 +643,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
 
   RemoveCopyFlag(DstReg, CopyMI);
 
-  // If copy kills the source register, find the last use and propagate
-  // kill.
-  bool checkForDeadDef = false;
   MachineBasicBlock *MBB = CopyMI->getParent();
-  if (SrcLR->end == CopyIdx.getDefIndex())
-    if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
-      checkForDeadDef = true;
-    }
-
   MachineBasicBlock::iterator MII =
     llvm::next(MachineBasicBlock::iterator(CopyMI));
   tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
   MachineInstr *NewMI = prior(MII);
 
-  if (checkForDeadDef) {
-    // PR4090 fix: Trim interval failed because there was no use of the
-    // source interval in this MBB. If the def is in this MBB too then we
-    // should mark it dead:
-    if (DefMI->getParent() == MBB) {
-      DefMI->addRegisterDead(SrcInt.reg, tri_);
-      SrcLR->end = SrcLR->start.getNextSlot();
-    }
-  }
-
   // CopyMI may have implicit operands, transfer them over to the newly
   // rematerialized instruction. And update implicit def interval valnos.
   for (unsigned i = CopyMI->getDesc().getNumOperands(),
@@ -734,13 +660,18 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       RemoveCopyFlag(MO.getReg(), CopyMI);
   }
 
-  TransferImplicitOps(CopyMI, NewMI);
+  NewMI->copyImplicitOps(CopyMI);
   li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
   DEBUG(dbgs() << "Remat: " << *NewMI);
   ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
   return true;
 }
 
@@ -756,6 +687,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
   unsigned DstReg = CP.getDstReg();
   unsigned SubIdx = CP.getSubIdx();
 
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
     // A PhysReg copy that won't be coalesced can perhaps be rematerialized
@@ -768,7 +702,7 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
           UseMI->getOperand(0).getReg() != SrcReg &&
           UseMI->getOperand(0).getReg() != DstReg &&
           !JoinedCopies.count(UseMI) &&
-          ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
                                   UseMI->getOperand(0).getReg(), 0, UseMI))
         continue;
     }
@@ -874,7 +808,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
   if (li_->hasInterval(DstReg)) {
     LiveInterval &LI = li_->getInterval(DstReg);
     if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->getCopy() == CopyMI)
+      if (LR->valno->def == DefIdx)
         LR->valno->setCopy(0);
   }
   if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
@@ -884,7 +818,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
       continue;
     LiveInterval &LI = li_->getInterval(*AS);
     if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->getCopy() == CopyMI)
+      if (LR->valno->def == DefIdx)
         LR->valno->setCopy(0);
   }
 }
@@ -1044,23 +978,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     return false;
   }
 
-  DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
 
   // Enforce policies.
   if (CP.isPhys()) {
-    DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+    DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
     // Only coalesce to allocatable physreg.
     if (!li_->isAllocatable(CP.getDstReg())) {
       DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
       return false;  // Not coalescable.
     }
   } else {
-    DEBUG({
-      dbgs() << " with reg%" << CP.getDstReg();
-      if (CP.getSubIdx())
-        dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
-      dbgs() << " to " << CP.getNewRC()->getName() << "\n";
-    });
+    DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+                 << " to " << CP.getNewRC()->getName() << "\n");
 
     // Avoid constraining virtual register regclass too much.
     if (CP.isCrossClass()) {
@@ -1114,7 +1044,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       // Before giving up coalescing, if definition of source is defined by
       // trivial computation, try rematerializing it.
       if (!CP.isFlipped() &&
-          ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+          ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
         return true;
 
       ++numAborts;
@@ -1134,7 +1064,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // If definition of source is defined by trivial computation, try
     // rematerializing it.
     if (!CP.isFlipped() &&
-        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
                                 CP.getDstReg(), 0, CopyMI))
       return true;
 
@@ -1317,7 +1247,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
       continue;
 
     // Never join with a register that has EarlyClobber redefs.
@@ -1341,7 +1271,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || VNI->getCopy() == 0)  // Src not defined by a copy?
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
       continue;
 
     // Never join with a register that has EarlyClobber redefs.
@@ -1495,9 +1425,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
   DEBUG(dbgs() << MBB->getName() << ":\n");
 
-  std::vector<CopyRec> VirtCopies;
-  std::vector<CopyRec> PhysCopies;
-  std::vector<CopyRec> ImpDefCopies;
+  SmallVector<CopyRec, 8> VirtCopies;
+  SmallVector<CopyRec, 8> PhysCopies;
+  SmallVector<CopyRec, 8> ImpDefCopies;
   for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
        MII != E;) {
     MachineInstr *Inst = MII++;
@@ -1690,6 +1620,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   tri_ = tm_->getRegisterInfo();
   tii_ = tm_->getInstrInfo();
   li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
   AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
@@ -1697,6 +1628,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
   for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
          E = tri_->regclass_end(); I != E; ++I)
     allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1739,9 +1673,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
           DoDelete = false;
         
         if (MI->allDefsAreDead()) {
-          LiveInterval &li = li_->getInterval(SrcReg);
-          if (!ShortenDeadCopySrcLiveRange(li, MI))
-            ShortenDeadCopyLiveRange(li, MI);
+          if (li_->hasInterval(SrcReg)) {
+            LiveInterval &li = li_->getInterval(SrcReg);
+            if (!ShortenDeadCopySrcLiveRange(li, MI))
+              ShortenDeadCopyLiveRange(li, MI);
+          }
           DoDelete = true;
         }
         if (!DoDelete) {
@@ -1821,13 +1757,26 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         if (!MO.isReg() || !MO.isKill()) continue;
         unsigned reg = MO.getReg();
         if (!reg || !li_->hasInterval(reg)) continue;
-        if (!li_->getInterval(reg).killedAt(DefIdx))
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
           MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
       }
     }
   }
 
   DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
   return true;
 }
 
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 855bdb98b36c..56703dfa2ddd 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
   class SimpleRegisterCoalescing;
-  class LiveVariables;
+  class LiveDebugVariables;
   class TargetRegisterInfo;
   class TargetInstrInfo;
   class VirtRegMap;
@@ -44,6 +44,7 @@ namespace llvm {
     const TargetRegisterInfo* tri_;
     const TargetInstrInfo* tii_;
     LiveIntervals *li_;
+    LiveDebugVariables *ldv_;
     const MachineLoopInfo* loopInfo;
     AliasAnalysis *AA;
     
@@ -63,7 +64,9 @@ namespace llvm {
 
   public:
     static char ID; // Pass identifcation, replacement for typeid
-    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
+    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
+      initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
+    }
 
     struct InstrSlots {
       enum {
@@ -140,8 +143,10 @@ namespace llvm {
 
     /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
     /// computation, replace the copy by rematerialize the definition.
-    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
-                                 unsigned DstSubIdx, MachineInstr *CopyMI);
+    /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+                                 unsigned DstReg, unsigned DstSubIdx,
+                                 MachineInstr *CopyMI);
 
     /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
     /// two virtual registers from different register classes.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index b637980f885c..13e1454fa5f3 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -21,15 +21,14 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
 using namespace llvm;
 
 STATISTIC(NumInvokes, "Number of invokes replaced");
@@ -53,6 +52,7 @@ namespace {
     Constant *SelectorFn;
     Constant *ExceptionFn;
     Constant *CallSiteFn;
+    Constant *DispatchSetupFn;
 
     Value *CallSite;
   public:
@@ -116,6 +116,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
   SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
   ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
   CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+  DispatchSetupFn
+    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
   PersonalityFn = 0;
 
   return true;
@@ -317,8 +319,12 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       Unwinds.push_back(UI);
     }
   }
-  // If we don't have any invokes or unwinds, there's nothing to do.
-  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // If we don't have any invokes, there's nothing to do.
+  if (Invokes.empty()) return false;
 
   // Find the eh.selector.*, eh.exception and alloca calls.
   //
@@ -332,6 +338,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   SmallVector<CallInst*,16> EH_Selectors;
   SmallVector<CallInst*,16> EH_Exceptions;
   SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
   // Note: Skip the entry block since there's nothing there that interests
   // us. eh.selector and eh.exception shouldn't ever be there, and we
   // want to disregard any allocas that are there.
@@ -351,228 +358,231 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       }
     }
   }
+
   // If we don't have any eh.selector calls, we can't determine the personality
   // function. Without a personality function, we can't process exceptions.
   if (!PersonalityFn) return false;
 
-  NumInvokes += Invokes.size();
-  NumUnwinds += Unwinds.size();
+  // We have invokes, so we need to add register/unregister calls to get this
+  // function onto the global unwind stack.
+  //
+  // First thing we need to do is scan the whole function for values that are
+  // live across unwind edges.  Each value that is live across an unwind edge we
+  // spill into a stack location, guaranteeing that there is nothing live across
+  // the unwind edge.  This process also splits all critical edges coming out of
+  // invoke's.
+  splitLiveRangesAcrossInvokes(Invokes);
+
+  BasicBlock *EntryBB = F.begin();
+  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+  // that needs to be restored on all exits from the function.  This is an
+  // alloca because the value needs to be added to the global context list.
+  unsigned Align = 4; // FIXME: Should be a TLI check?
+  AllocaInst *FunctionContext =
+    new AllocaInst(FunctionContextTy, 0, Align,
+                   "fcn_context", F.begin()->begin());
+
+  Value *Idxs[2];
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  // We need to also keep around a reference to the call_site field
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                       "call_site",
+                                       EntryBB->getTerminator());
+
+  // The exception selector comes back in context->data[1]
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                            "fc_data",
+                                            EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                  "exc_selector_gep",
+                                                  EntryBB->getTerminator());
+  // The exception value comes back in context->data[0]
+  Idxs[1] = Zero;
+  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                   "exception_gep",
+                                                   EntryBB->getTerminator());
+
+  // The result of the eh.selector call will be replaced with a a reference to
+  // the selector value returned in the function context. We leave the selector
+  // itself so the EH analysis later can use it.
+  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+    CallInst *I = EH_Selectors[i];
+    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+    I->replaceAllUsesWith(SelectorVal);
+  }
 
-  if (!Invokes.empty()) {
-    // We have invokes, so we need to add register/unregister calls to get
-    // this function onto the global unwind stack.
-    //
-    // First thing we need to do is scan the whole function for values that are
-    // live across unwind edges.  Each value that is live across an unwind edge
-    // we spill into a stack location, guaranteeing that there is nothing live
-    // across the unwind edge.  This process also splits all critical edges
-    // coming out of invoke's.
-    splitLiveRangesAcrossInvokes(Invokes);
-
-    BasicBlock *EntryBB = F.begin();
-    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
-    // that needs to be restored on all exits from the function.  This is an
-    // alloca because the value needs to be added to the global context list.
-    unsigned Align = 4; // FIXME: Should be a TLI check?
-    AllocaInst *FunctionContext =
-      new AllocaInst(FunctionContextTy, 0, Align,
-                     "fcn_context", F.begin()->begin());
-
-    Value *Idxs[2];
-    const Type *Int32Ty = Type::getInt32Ty(F.getContext());
-    Value *Zero = ConstantInt::get(Int32Ty, 0);
-    // We need to also keep around a reference to the call_site field
-    Idxs[0] = Zero;
-    Idxs[1] = ConstantInt::get(Int32Ty, 1);
-    CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                         "call_site",
-                                         EntryBB->getTerminator());
-
-    // The exception selector comes back in context->data[1]
-    Idxs[1] = ConstantInt::get(Int32Ty, 2);
-    Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                              "fc_data",
-                                              EntryBB->getTerminator());
-    Idxs[1] = ConstantInt::get(Int32Ty, 1);
-    Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
-                                                    "exc_selector_gep",
-                                                    EntryBB->getTerminator());
-    // The exception value comes back in context->data[0]
-    Idxs[1] = Zero;
-    Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
-                                                     "exception_gep",
-                                                     EntryBB->getTerminator());
-
-    // The result of the eh.selector call will be replaced with a
-    // a reference to the selector value returned in the function
-    // context. We leave the selector itself so the EH analysis later
-    // can use it.
-    for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
-      CallInst *I = EH_Selectors[i];
-      Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
-      I->replaceAllUsesWith(SelectorVal);
-    }
-    // eh.exception calls are replaced with references to the proper
-    // location in the context. Unlike eh.selector, the eh.exception
-    // calls are removed entirely.
-    for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
-      CallInst *I = EH_Exceptions[i];
-      // Possible for there to be duplicates, so check to make sure
-      // the instruction hasn't already been removed.
-      if (!I->getParent()) continue;
-      Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
-      const Type *Ty = Type::getInt8PtrTy(F.getContext());
-      Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
-      I->replaceAllUsesWith(Val);
-      I->eraseFromParent();
-    }
+  // eh.exception calls are replaced with references to the proper location in
+  // the context. Unlike eh.selector, the eh.exception calls are removed
+  // entirely.
+  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+    CallInst *I = EH_Exceptions[i];
+    // Possible for there to be duplicates, so check to make sure the
+    // instruction hasn't already been removed.
+    if (!I->getParent()) continue;
+    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+    const Type *Ty = Type::getInt8PtrTy(F.getContext());
+    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+    I->replaceAllUsesWith(Val);
+    I->eraseFromParent();
+  }
 
-    // The entry block changes to have the eh.sjlj.setjmp, with a conditional
-    // branch to a dispatch block for non-zero returns. If we return normally,
-    // we're not handling an exception and just register the function context
-    // and continue.
-
-    // Create the dispatch block.  The dispatch block is basically a big switch
-    // statement that goes to all of the invoke landing pads.
-    BasicBlock *DispatchBlock =
-            BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
-    // Insert a load in the Catch block, and a switch on its value.  By default,
-    // we go to a block that just does an unwind (which is the correct action
-    // for a standard call).
-    BasicBlock *UnwindBlock =
-      BasicBlock::Create(F.getContext(), "unwindbb", &F);
-    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
-
-    Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
-                                       DispatchBlock);
-    SwitchInst *DispatchSwitch =
-      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
-                         DispatchBlock);
-    // Split the entry block to insert the conditional branch for the setjmp.
-    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
-                                                     "eh.sjlj.setjmp.cont");
-
-    // Populate the Function Context
-    //   1. LSDA address
-    //   2. Personality function address
-    //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
-    // LSDA address
-    Idxs[0] = Zero;
-    Idxs[1] = ConstantInt::get(Int32Ty, 4);
-    Value *LSDAFieldPtr =
-      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                "lsda_gep",
-                                EntryBB->getTerminator());
-    Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
-                                   EntryBB->getTerminator());
-    new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
-    Idxs[1] = ConstantInt::get(Int32Ty, 3);
-    Value *PersonalityFieldPtr =
-      GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                "lsda_gep",
+  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+  // branch to a dispatch block for non-zero returns. If we return normally,
+  // we're not handling an exception and just register the function context and
+  // continue.
+
+  // Create the dispatch block.  The dispatch block is basically a big switch
+  // statement that goes to all of the invoke landing pads.
+  BasicBlock *DispatchBlock =
+    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+  // Add a call to dispatch_setup at the start of the dispatch block. This is
+  // expanded to any target-specific setup that needs to be done.
+  Value *SetupArg =
+    CastInst::Create(Instruction::BitCast, FunctionContext,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     DispatchBlock);
+  CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
+
+  // Insert a load of the callsite in the dispatch block, and a switch on its
+  // value.  By default, we go to a block that just does an unwind (which is the
+  // correct action for a standard call).
+  BasicBlock *UnwindBlock =
+    BasicBlock::Create(F.getContext(), "unwindbb", &F);
+  Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                     DispatchBlock);
+  SwitchInst *DispatchSwitch =
+    SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+                       DispatchBlock);
+  // Split the entry block to insert the conditional branch for the setjmp.
+  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                   "eh.sjlj.setjmp.cont");
+
+  // Populate the Function Context
+  //   1. LSDA address
+  //   2. Personality function address
+  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+  // LSDA address
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 4);
+  Value *LSDAFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                 EntryBB->getTerminator());
+  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Value *PersonalityFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                EntryBB->getTerminator());
+
+  // Save the frame pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 5);
+  Value *JBufPtr
+    = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "jbuf_gep",
                                 EntryBB->getTerminator());
-    new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
-                  EntryBB->getTerminator());
-
-    // Save the frame pointer.
-    Idxs[1] = ConstantInt::get(Int32Ty, 5);
-    Value *JBufPtr
-      = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
-                                  "jbuf_gep",
-                                  EntryBB->getTerminator());
-    Idxs[1] = ConstantInt::get(Int32Ty, 0);
-    Value *FramePtr =
-      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+  Idxs[1] = ConstantInt::get(Int32Ty, 0);
+  Value *FramePtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                              EntryBB->getTerminator());
+
+  Value *Val = CallInst::Create(FrameAddrFn,
+                                ConstantInt::get(Int32Ty, 0),
+                                "fp",
                                 EntryBB->getTerminator());
+  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+  // Save the stack pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *StackPtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+                              EntryBB->getTerminator());
+
+  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+  Value *SetjmpArg =
+    CastInst::Create(Instruction::BitCast, JBufPtr,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     EntryBB->getTerminator());
+  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                        "dispatch",
+                                        EntryBB->getTerminator());
+  // check the return value of the setjmp. non-zero goes to dispatcher.
+  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                 "notunwind");
+  // Nuke the uncond branch.
+  EntryBB->getTerminator()->eraseFromParent();
+
+  // Put in a new condbranch in its place.
+  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+  // Register the function context and make sure it's known to not throw
+  CallInst *Register =
+    CallInst::Create(RegisterFn, FunctionContext, "",
+                     ContBlock->getTerminator());
+  Register->setDoesNotThrow();
+
+  // At this point, we are all set up, update the invoke instructions to mark
+  // their call_site values, and fill in the dispatch switch accordingly.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+  // Mark call instructions that aren't nounwind as no-action (call_site ==
+  // -1). Skip the entry block, as prior to then, no function context has been
+  // created for this function and any unexpected exceptions thrown will go
+  // directly to the caller's context, which is what we want anyway, so no need
+  // to do anything here.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        // Ignore calls to the EH builtins (eh.selector, eh.exception)
+        Constant *Callee = CI->getCalledFunction();
+        if (Callee != SelectorFn && Callee != ExceptionFn
+            && !CI->doesNotThrow())
+          insertCallSiteStore(CI, -1, CallSite);
+      }
+  }
 
-    Value *Val = CallInst::Create(FrameAddrFn,
-                                  ConstantInt::get(Int32Ty, 0),
-                                  "fp",
-                                  EntryBB->getTerminator());
-    new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
-    // Save the stack pointer.
-    Idxs[1] = ConstantInt::get(Int32Ty, 2);
-    Value *StackPtr =
-      GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
-                                EntryBB->getTerminator());
-
-    Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
-    new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
-    // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-    Value *SetjmpArg =
-      CastInst::Create(Instruction::BitCast, JBufPtr,
-                       Type::getInt8PtrTy(F.getContext()), "",
-                       EntryBB->getTerminator());
-    Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
-                                          "dispatch",
-                                          EntryBB->getTerminator());
-    // check the return value of the setjmp. non-zero goes to dispatcher.
-    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
-                                   ICmpInst::ICMP_EQ, DispatchVal, Zero,
-                                   "notunwind");
-    // Nuke the uncond branch.
-    EntryBB->getTerminator()->eraseFromParent();
-
-    // Put in a new condbranch in its place.
-    BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
-    // Register the function context and make sure it's known to not throw
-    CallInst *Register =
-      CallInst::Create(RegisterFn, FunctionContext, "",
-                       ContBlock->getTerminator());
-    Register->setDoesNotThrow();
-
-    // At this point, we are all set up, update the invoke instructions
-    // to mark their call_site values, and fill in the dispatch switch
-    // accordingly.
-    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-      markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
-    // Mark call instructions that aren't nounwind as no-action
-    // (call_site == -1). Skip the entry block, as prior to then, no function
-    // context has been created for this function and any unexpected exceptions
-    // thrown will go directly to the caller's context, which is what we want
-    // anyway, so no need to do anything here.
-    for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
-      for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
-        if (CallInst *CI = dyn_cast<CallInst>(I)) {
-          // Ignore calls to the EH builtins (eh.selector, eh.exception)
-          Constant *Callee = CI->getCalledFunction();
-          if (Callee != SelectorFn && Callee != ExceptionFn
-              && !CI->doesNotThrow())
-            insertCallSiteStore(CI, -1, CallSite);
-        }
-    }
-
-    // Replace all unwinds with a branch to the unwind handler.
-    // ??? Should this ever happen with sjlj exceptions?
-    for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-      BranchInst::Create(UnwindBlock, Unwinds[i]);
-      Unwinds[i]->eraseFromParent();
-    }
-
-    // Following any allocas not in the entry block, update the saved SP
-    // in the jmpbuf to the new value.
-    for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
-      Instruction *AI = JmpbufUpdatePoints[i];
-      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
-      StackAddr->insertAfter(AI);
-      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
-      StoreStackAddr->insertAfter(StackAddr);
-    }
+  // Replace all unwinds with a branch to the unwind handler.
+  // ??? Should this ever happen with sjlj exceptions?
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindBlock, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
 
-    // Finally, for any returns from this function, if this function contains an
-    // invoke, add a call to unregister the function context.
-    for (unsigned i = 0, e = Returns.size(); i != e; ++i)
-      CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+    Instruction *AI = JmpbufUpdatePoints[i];
+    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+    StackAddr->insertAfter(AI);
+    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+    StoreStackAddr->insertAfter(StackAddr);
   }
 
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, add a call to unregister the function context.
+  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
   return true;
 }
 
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 1bc148f160bc..6e3fa90e4341 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -41,7 +41,7 @@ namespace {
 
 char SlotIndexes::ID = 0;
 INITIALIZE_PASS(SlotIndexes, "slotindexes",
-                "Slot index numbering", false, false);
+                "Slot index numbering", false, false)
 
 IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
   return &*IndexListEntryEmptyKey;
@@ -61,7 +61,6 @@ void SlotIndexes::releaseMemory() {
   mi2iMap.clear();
   mbb2IdxMap.clear();
   idx2MBBMap.clear();
-  terminatorGaps.clear();
   clearList();
 }
 
@@ -112,13 +111,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       if (mi->isDebugValue())
         continue;
 
-      if (miItr == mbb->getFirstTerminator()) {
-        push_back(createEntry(0, index));
-        terminatorGaps.insert(
-          std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-        index += SlotIndex::NUM;
-      }
-
       // Insert a store index for the instr.
       push_back(createEntry(mi, index));
 
@@ -135,15 +127,12 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       index += (Slots + 1) * SlotIndex::NUM;
     }
 
-    if (mbb->getFirstTerminator() == mbb->end()) {
-      push_back(createEntry(0, index));
-      terminatorGaps.insert(
-        std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
-      index += SlotIndex::NUM;
-    }
+    // We insert two blank instructions between basic blocks.
+    // One to represent live-out registers and one to represent live-ins.
+    push_back(createEntry(0, index));
+    index += SlotIndex::NUM;
 
-    // One blank instruction at the end.
-    push_back(createEntry(0, index));    
+    push_back(createEntry(0, index));
 
     SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
     mbb2IdxMap.insert(
@@ -169,6 +158,7 @@ void SlotIndexes::renumberIndexes() {
   // resulting numbering will match what would have been generated by the
   // pass during the initial numbering of the function if the new instructions
   // had been present.
+  DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
 
   functionSize = 0;
   unsigned index = 0;
@@ -179,7 +169,7 @@ void SlotIndexes::renumberIndexes() {
     curEntry->setIndex(index);
 
     if (curEntry->getInstr() == 0) {
-      // MBB start entry or terminator gap. Just step index by 1.
+      // MBB start entry. Just step index by 1.
       index += SlotIndex::NUM;
     }
     else {
@@ -214,11 +204,10 @@ void SlotIndexes::dump() const {
 
 // Print a SlotIndex to a raw_ostream.
 void SlotIndex::print(raw_ostream &os) const {
-  os << entry().getIndex();
-  if (isPHI())
-    os << "*";
+  if (isValid())
+    os << entry().getIndex() << "LudS"[getSlot()];
   else
-    os << "LudS"[getSlot()];
+    os << "invalid";
 }
 
 // Dump a SlotIndex to stderr.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..9c0bf1629a14
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,330 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+//   E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+                      "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+                    "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<EdgeBundles>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+  /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+  /// Ideally, these two numbers should be identical, but inaccuracies in the
+  /// block frequency estimates means that we need to normalize ingoing and
+  /// outgoing frequencies separately so they are commensurate.
+  float Frequency[2];
+
+  /// Bias - Normalized contributions from non-transparent blocks.
+  /// A bundle connected to a MustSpill block has a huge negative bias,
+  /// otherwise it is a number in the range [-2;2].
+  float Bias;
+
+  /// Value - Output value of this node computed from the Bias and links.
+  /// This is always in the range [-1;1]. A positive number means the variable
+  /// should go in a register through this bundle.
+  float Value;
+
+  typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+  /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+  /// bundles. The weights are all positive and add up to at most 2, weights
+  /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+  /// sum can be less than 2 when the variable is not live into / out of some
+  /// connected basic blocks.
+  LinkVector Links;
+
+  /// preferReg - Return true when this node prefers to be in a register.
+  bool preferReg() const {
+    // Undecided nodes (Value==0) go on the stack.
+    return Value > 0;
+  }
+
+  /// mustSpill - Return True if this node is so biased that it must spill.
+  bool mustSpill() const {
+    // Actually, we must spill if Bias < sum(weights).
+    // It may be worth it to compute the weight sum here?
+    return Bias < -2.0f;
+  }
+
+  /// Node - Create a blank Node.
+  Node() {
+    Frequency[0] = Frequency[1] = 0;
+  }
+
+  /// clear - Reset per-query data, but preserve frequencies that only depend on
+  // the CFG.
+  void clear() {
+    Bias = Value = 0;
+    Links.clear();
+  }
+
+  /// addLink - Add a link to bundle b with weight w.
+  /// out=0 for an ingoing link, and 1 for an outgoing link.
+  void addLink(unsigned b, float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+
+    // There can be multiple links to the same bundle, add them up.
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      if (I->second == b) {
+        I->first += w;
+        return;
+      }
+    // This must be the first link to b.
+    Links.push_back(std::make_pair(w, b));
+  }
+
+  /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+  void addBias(float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+    Bias += w;
+  }
+
+  /// update - Recompute Value from Bias and Links. Return true when node
+  /// preference changes.
+  bool update(const Node nodes[]) {
+    // Compute the weighted sum of inputs.
+    float Sum = Bias;
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      Sum += I->first * nodes[I->second].Value;
+
+    // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+    // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+    // two reasons:
+    //  1. It avoids arbitrary bias when all links are 0 as is possible during
+    //     initial iterations.
+    //  2. It helps tame rounding errors when the links nominally sum to 0.
+    const float Thres = 1e-4f;
+    bool Before = preferReg();
+    if (Sum < -Thres)
+      Value = -1;
+    else if (Sum > Thres)
+      Value = 1;
+    else
+      Value = 0;
+    return Before != preferReg();
+  }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  bundles = &getAnalysis<EdgeBundles>();
+  loops = &getAnalysis<MachineLoopInfo>();
+
+  assert(!nodes && "Leaking node array");
+  nodes = new Node[bundles->getNumBundles()];
+
+  // Compute total ingoing and outgoing block frequencies for all bundles.
+  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+    float Freq = getBlockFrequency(I);
+    unsigned Num = I->getNumber();
+    nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
+    nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+  }
+
+  // We never change the function.
+  return false;
+}
+
+void SpillPlacement::releaseMemory() {
+  delete[] nodes;
+  nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+  if (ActiveNodes->test(n))
+    return;
+  ActiveNodes->set(n);
+  nodes[n].clear();
+}
+
+
+/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::
+prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
+  for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+       E = LiveBlocks.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number);
+    float Freq = getBlockFrequency(MBB);
+
+    // Is this a transparent block? Link ingoing and outgoing bundles.
+    if (I->Entry == DontCare && I->Exit == DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      unsigned ob = bundles->getBundle(I->Number, 1);
+
+      // Ignore self-loops.
+      if (ib == ob)
+        continue;
+      activate(ib);
+      activate(ob);
+      nodes[ib].addLink(ob, Freq, 1);
+      nodes[ob].addLink(ib, Freq, 0);
+      continue;
+    }
+
+    // This block is not transparent, but it can still add bias.
+    const float Bias[] = {
+      0,           // DontCare,
+      1,           // PrefReg,
+      -1,          // PrefSpill
+      -HUGE_VALF   // MustSpill
+    };
+
+    // Live-in to block?
+    if (I->Entry != DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      activate(ib);
+      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+    }
+
+    // Live-out from block?
+    if (I->Exit != DontCare) {
+      unsigned ob = bundles->getBundle(I->Number, 1);
+      activate(ob);
+      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+    }
+  }
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+  if (Linked.empty())
+    return;
+
+  // Run up to 10 iterations. The edge bundle numbering is closely related to
+  // basic block numbering, so there is a strong tendency towards chains of
+  // linked nodes with sequential numbers. By scanning the linked nodes
+  // backwards and forwards, we make it very likely that a single node can
+  // affect the entire network in a single iteration. That means very fast
+  // convergence, usually in a single iteration.
+  for (unsigned iteration = 0; iteration != 10; ++iteration) {
+    // Scan backwards, skipping the last node which was just updated.
+    bool Changed = false;
+    for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+           llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+
+    // Scan forwards, skipping the first node which was just updated.
+    Changed = false;
+    for (SmallVectorImpl<unsigned>::const_iterator I =
+           llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+  }
+}
+
+bool
+SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                            BitVector &RegBundles) {
+  // Reuse RegBundles as our ActiveNodes vector.
+  ActiveNodes = &RegBundles;
+  ActiveNodes->clear();
+  ActiveNodes->resize(bundles->getNumBundles());
+
+  // Compute active nodes, links and biases.
+  prepareNodes(LiveBlocks);
+
+  // Update all active nodes, and find the ones that are actually linked to
+  // something so their value may change when iterating.
+  SmallVector<unsigned, 8> Linked;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
+      Linked.push_back(n);
+  }
+
+  // Iterate the network to convergence.
+  iterate(Linked);
+
+  // Write preferences back to RegBundles.
+  bool Perfect = true;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+    if (!nodes[n].preferReg()) {
+      RegBundles.reset(n);
+      Perfect = false;
+    }
+  return Perfect;
+}
+
+/// getBlockFrequency - Return our best estimate of the block frequency which is
+/// the expected number of block executions per function invocation.
+float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) {
+  // Use the unnormalized spill weight for real block frequencies.
+  return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB));
+}
+
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..ef2d516cdce7
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,108 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by placeSpills() which is called by the
+// register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+template <typename> class SmallVectorImpl;
+
+class SpillPlacement  : public MachineFunctionPass {
+  struct Node;
+  const MachineFunction *MF;
+  const EdgeBundles *bundles;
+  const MachineLoopInfo *loops;
+  Node *nodes;
+
+  // Nodes that are active in the current computation. Owned by the placeSpills
+  // caller.
+  BitVector *ActiveNodes;
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+
+  SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+  ~SpillPlacement() { releaseMemory(); }
+
+  /// BorderConstraint - A basic block has separate constraints for entry and
+  /// exit.
+  enum BorderConstraint {
+    DontCare,  ///< Block doesn't care / variable not live.
+    PrefReg,   ///< Block entry/exit prefers a register.
+    PrefSpill, ///< Block entry/exit prefers a stack slot.
+    MustSpill  ///< A register is impossible, variable must be spilled.
+  };
+
+  /// BlockConstraint - Entry and exit constraints for a basic block.
+  struct BlockConstraint {
+    unsigned Number;            ///< Basic block number (from MBB::getNumber()).
+    BorderConstraint Entry : 8; ///< Constraint on block entry.
+    BorderConstraint Exit : 8;  ///< Constraint on block exit.
+  };
+
+  /// placeSpills - Compute the optimal spill code placement given the
+  /// constraints. No MustSpill constraints will be violated, and the smallest
+  /// possible number of PrefX constraints will be violated, weighted by
+  /// expected execution frequencies.
+  /// @param LiveBlocks Constraints for blocks that have the variable live in or
+  ///                   live out. DontCare/DontCare means the variable is live
+  ///                   through the block. DontCare/X means the variable is live
+  ///                   out, but not live in.
+  /// @param RegBundles Bit vector to receive the edge bundles where the
+  ///                   variable should be kept in a register. Each bit
+  ///                   corresponds to an edge bundle, a set bit means the
+  ///                   variable should be kept in a register through the
+  ///                   bundle. A clear bit means the variable should be
+  ///                   spilled.
+  /// @return True if a perfect solution was found, allowing the variable to be
+  ///         in a register through all relevant bundles.
+  bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                   BitVector &RegBundles);
+
+  /// getBlockFrequency - Return the estimated block execution frequency per
+  /// function invocation.
+  float getBlockFrequency(const MachineBasicBlock*);
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  virtual void releaseMemory();
+
+  void activate(unsigned);
+  void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
+  void iterate(const SmallVectorImpl<unsigned>&);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 59d5ab33c994..fd385824aff9 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -12,6 +12,7 @@
 #include "Spiller.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,7 +29,7 @@
 using namespace llvm;
 
 namespace {
-  enum SpillerName { trivial, standard, splitting, inline_ };
+  enum SpillerName { trivial, standard, inline_ };
 }
 
 static cl::opt<SpillerName>
@@ -37,7 +38,6 @@ spillerOpt("spiller",
            cl::Prefix,
            cl::values(clEnumVal(trivial,   "trivial spiller"),
                       clEnumVal(standard,  "default spiller"),
-                      clEnumVal(splitting, "splitting spiller"),
                       clEnumValN(inline_,  "inline", "inline spiller"),
                       clEnumValEnd),
            cl::init(standard));
@@ -80,7 +80,7 @@ protected:
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
 
-    assert(!li->isStackSlot() &&
+    assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
            "Trying to spill a stack slot.");
 
     DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
@@ -144,7 +144,7 @@ protected:
         vrm->addSpillSlotUse(ss, loadInstr);
         SlotIndex endIndex = loadIndex.getNextIndex();
         VNInfo *loadVNI =
-          newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
       }
 
@@ -158,7 +158,7 @@ protected:
         vrm->addSpillSlotUse(ss, storeInstr);
         SlotIndex beginIndex = storeIndex.getPrevIndex();
         VNInfo *storeVNI =
-          newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
       }
 
@@ -182,7 +182,7 @@ public:
 
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &) {
+             const SmallVectorImpl<LiveInterval*> &) {
     // Ignore spillIs - we don't use it.
     trivialSpillEverywhere(li, newIntervals);
   }
@@ -195,315 +195,42 @@ namespace {
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
 protected:
+  MachineFunction *mf;
   LiveIntervals *lis;
+  LiveStacks *lss;
   MachineLoopInfo *loopInfo;
   VirtRegMap *vrm;
 public:
   StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
                   VirtRegMap &vrm)
-    : lis(&pass.getAnalysis<LiveIntervals>()),
+    : mf(&mf),
+      lis(&pass.getAnalysis<LiveIntervals>()),
+      lss(&pass.getAnalysis<LiveStacks>()),
       loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
       vrm(&vrm) {}
 
   /// Falls back on LiveIntervals::addIntervalsForSpills.
   void spill(LiveInterval *li,
              SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs) {
+             const SmallVectorImpl<LiveInterval*> &spillIs) {
     std::vector<LiveInterval*> added =
       lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
     newIntervals.insert(newIntervals.end(), added.begin(), added.end());
-  }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// When a call to spill is placed this spiller will first try to break the
-/// interval up into its component values (one new interval per value).
-/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
-class SplittingSpiller : public StandardSpiller {
-public:
-  SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
-                   VirtRegMap &vrm)
-    : StandardSpiller(pass, mf, vrm) {
-    mri = &mf.getRegInfo();
-    tii = mf.getTarget().getInstrInfo();
-    tri = mf.getTarget().getRegisterInfo();
-  }
 
-  void spill(LiveInterval *li,
-             SmallVectorImpl<LiveInterval*> &newIntervals,
-             SmallVectorImpl<LiveInterval*> &spillIs) {
-    if (worthTryingToSplit(li))
-      tryVNISplit(li);
-    else
-      StandardSpiller::spill(li, newIntervals, spillIs);
+    // Update LiveStacks.
+    int SS = vrm->getStackSlot(li->reg);
+    if (SS == VirtRegMap::NO_STACK_SLOT)
+      return;
+    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+    if (!SI.hasAtLeastOneValue())
+      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+    SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
   }
-
-private:
-
-  MachineRegisterInfo *mri;
-  const TargetInstrInfo *tii;
-  const TargetRegisterInfo *tri;
-  DenseSet<LiveInterval*> alreadySplit;
-
-  bool worthTryingToSplit(LiveInterval *li) const {
-    return (!alreadySplit.count(li) && li->getNumValNums() > 1);
-  }
-
-  /// Try to break a LiveInterval into its component values.
-  std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
-
-    DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
-
-    std::vector<LiveInterval*> added;
-    SmallVector<VNInfo*, 4> vnis;
-
-    std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
-    for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
-         vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
-      VNInfo *vni = *vniItr;
-
-      // Skip unused VNIs.
-      if (vni->isUnused())
-        continue;
-
-      DEBUG(dbgs() << "  Extracted Val #" << vni->id << " as ");
-      LiveInterval *splitInterval = extractVNI(li, vni);
-
-      if (splitInterval != 0) {
-        DEBUG(dbgs() << *splitInterval << "\n");
-        added.push_back(splitInterval);
-        alreadySplit.insert(splitInterval);
-      } else {
-        DEBUG(dbgs() << "0\n");
-      }
-    }
-
-    DEBUG(dbgs() << "Original LI: " << *li << "\n");
-
-    // If there original interval still contains some live ranges
-    // add it to added and alreadySplit.
-    if (!li->empty()) {
-      added.push_back(li);
-      alreadySplit.insert(li);
-    }
-
-    return added;
-  }
-
-  /// Extract the given value number from the interval.
-  LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
-    assert(vni->isDefAccurate() || vni->isPHIDef());
-
-    // Create a new vreg and live interval, copy VNI ranges over.
-    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
-    unsigned newVReg = mri->createVirtualRegister(trc);
-    vrm->grow();
-    LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
-    VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
-
-    // Start by copying all live ranges in the VN to the new interval.
-    for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
-         rItr != rEnd; ++rItr) {
-      if (rItr->valno == vni) {
-        newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
-      }
-    }
-
-    // Erase the old VNI & ranges.
-    li->removeValNo(vni);
-
-    // Collect all current uses of the register belonging to the given VNI.
-    // We'll use this to rename the register after we've dealt with the def.
-    std::set<MachineInstr*> uses;
-    for (MachineRegisterInfo::use_iterator
-         useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
-         useItr != useEnd; ++useItr) {
-      uses.insert(&*useItr);
-    }
-
-    // Process the def instruction for this VNI.
-    if (newVNI->isPHIDef()) {
-      // Insert a copy at the start of the MBB. The range proceeding the
-      // copy will be attached to the original LiveInterval.
-      MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
-      MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
-                                     tii->get(TargetOpcode::COPY), newVReg)
-                               .addReg(li->reg, RegState::Kill);
-      SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-      VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
-                                           0, false, lis->getVNInfoAllocator());
-      phiDefVNI->setIsPHIDef(true);
-      li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
-      LiveRange *oldPHIDefRange =
-        newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
-
-      // If the old phi def starts in the middle of the range chop it up.
-      if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
-        LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
-                                  oldPHIDefRange->valno);
-        oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
-        newLI->addRange(oldPHIDefRange2);
-      } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
-        // Otherwise if it's at the start of the range just trim it.
-        oldPHIDefRange->start = copyIdx.getDefIndex();
-      } else {
-        assert(false && "PHI def range doesn't cover PHI def?");
-      }
-
-      newVNI->def = copyIdx.getDefIndex();
-      newVNI->setCopy(copyMI);
-      newVNI->setIsPHIDef(false); // not a PHI def anymore.
-      newVNI->setIsDefAccurate(true);
-    } else {
-      // non-PHI def. Rename the def. If it's two-addr that means renaming the
-      // use and inserting a new copy too.
-      MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
-      // We'll rename this now, so we can remove it from uses.
-      uses.erase(defInst);
-      unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
-      bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
-        twoAddrUseIsUndef = false;
-
-      for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
-        MachineOperand &mo = defInst->getOperand(i);
-        if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
-          mo.setReg(newVReg);
-          if (isTwoAddr && mo.isUse() && mo.isUndef())
-            twoAddrUseIsUndef = true;
-        }
-      }
-
-      SlotIndex defIdx = lis->getInstructionIndex(defInst);
-      newVNI->def = defIdx.getDefIndex();
-
-      if (isTwoAddr && !twoAddrUseIsUndef) {
-        MachineBasicBlock *defMBB = defInst->getParent();
-        MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
-                                       tii->get(TargetOpcode::COPY), newVReg)
-                                 .addReg(li->reg, RegState::Kill);
-        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-        LiveRange *origUseRange =
-          li->getLiveRangeContaining(newVNI->def.getUseIndex());
-        origUseRange->end = copyIdx.getDefIndex();
-        VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
-                                              true, lis->getVNInfoAllocator());
-        LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
-        newLI->addRange(copyRange);
-      }
-    }
-
-    for (std::set<MachineInstr*>::iterator
-         usesItr = uses.begin(), usesEnd = uses.end();
-         usesItr != usesEnd; ++usesItr) {
-      MachineInstr *useInst = *usesItr;
-      SlotIndex useIdx = lis->getInstructionIndex(useInst);
-      LiveRange *useRange =
-        newLI->getLiveRangeContaining(useIdx.getUseIndex());
-
-      // If this use doesn't belong to the new interval skip it.
-      if (useRange == 0)
-        continue;
-
-      // This use doesn't belong to the VNI, skip it.
-      if (useRange->valno != newVNI)
-        continue;
-
-      // Check if this instr is two address.
-      unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
-      bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
-      // Rename uses (and defs for two-address instrs).
-      for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
-        MachineOperand &mo = useInst->getOperand(i);
-        if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
-            (mo.getReg() == li->reg)) {
-          mo.setReg(newVReg);
-        }
-      }
-
-      // If this is a two address instruction we've got some extra work to do.
-      if (isTwoAddress) {
-        // We modified the def operand, so we need to copy back to the original
-        // reg.
-        MachineBasicBlock *useMBB = useInst->getParent();
-        MachineBasicBlock::iterator useItr(useInst);
-        MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
-                                       tii->get(TargetOpcode::COPY), newVReg)
-                                 .addReg(li->reg, RegState::Kill);
-        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
-        // Change the old two-address defined range & vni to start at
-        // (and be defined by) the copy.
-        LiveRange *origDefRange =
-          li->getLiveRangeContaining(useIdx.getDefIndex());
-        origDefRange->start = copyIdx.getDefIndex();
-        origDefRange->valno->def = copyIdx.getDefIndex();
-        origDefRange->valno->setCopy(copyMI);
-
-        // Insert a new range & vni for the two-address-to-copy value. This
-        // will be attached to the new live interval.
-        VNInfo *copyVNI =
-          newLI->getNextValue(useIdx.getDefIndex(), 0, true,
-                              lis->getVNInfoAllocator());
-        LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
-        newLI->addRange(copyRange);
-      }
-    }
-
-    // Iterate over any PHI kills - we'll need to insert new copies for them.
-    for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
-         LRI != LRE; ++LRI) {
-      if (LRI->valno != newVNI || LRI->end.isPHI())
-        continue;
-      SlotIndex killIdx = LRI->end;
-      MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
-      MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
-                                     DebugLoc(), tii->get(TargetOpcode::COPY),
-                                     li->reg)
-                               .addReg(newVReg, RegState::Kill);
-      SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
-      // Save the current end. We may need it to add a new range if the
-      // current range runs of the end of the MBB.
-      SlotIndex newKillRangeEnd = LRI->end;
-      LRI->end = copyIdx.getDefIndex();
-
-      if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
-        assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
-               "PHI kill range doesn't reach kill-block end. Not sane.");
-        newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
-                                  newKillRangeEnd, newVNI));
-      }
-
-      VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
-                                            copyMI, true,
-                                            lis->getVNInfoAllocator());
-      newKillVNI->setHasPHIKill(true);
-      li->addRange(LiveRange(copyIdx.getDefIndex(),
-                             lis->getMBBEndIdx(killMBB),
-                             newKillVNI));
-    }
-    newVNI->setHasPHIKill(false);
-
-    return newLI;
-  }
-
 };
 
 } // end anonymous namespace
 
-
-namespace llvm {
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
-                             MachineFunction &mf,
-                             VirtRegMap &vrm);
-}
-
 llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
                                    MachineFunction &mf,
                                    VirtRegMap &vrm) {
@@ -511,7 +238,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
   default: assert(0 && "unknown spiller");
   case trivial: return new TrivialSpiller(pass, mf, vrm);
   case standard: return new StandardSpiller(pass, mf, vrm);
-  case splitting: return new SplittingSpiller(pass, mf, vrm);
   case inline_: return createInlineSpiller(pass, mf, vrm);
   }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 59bc0ec6ae70..f017583494ed 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -10,14 +10,13 @@
 #ifndef LLVM_CODEGEN_SPILLER_H
 #define LLVM_CODEGEN_SPILLER_H
 
-#include "llvm/ADT/SmallVector.h"
-
 namespace llvm {
 
   class LiveInterval;
   class MachineFunction;
   class MachineFunctionPass;
   class SlotIndex;
+  template <typename T> class SmallVectorImpl;
   class VirtRegMap;
 
   /// Spiller interface.
@@ -37,7 +36,7 @@ namespace llvm {
     /// @param newIntervals  The newly created intervals will be appended here.
     virtual void spill(LiveInterval *li,
                        SmallVectorImpl<LiveInterval*> &newIntervals,
-                       SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+                       const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
 
   };
 
@@ -45,6 +44,13 @@ namespace llvm {
   Spiller* createSpiller(MachineFunctionPass &pass,
                          MachineFunction &mf,
                          VirtRegMap &vrm);
+
+  /// Create and return a spiller that will insert spill code directly instead
+  /// of deferring though VirtRegMap.
+  Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                               MachineFunction &mf,
+                               VirtRegMap &vrm);
+
 }
 
 #endif
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 29474f0d5512..5663936bf3aa 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,13 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "splitter"
+#define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
+#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -36,371 +37,231 @@ AllowSplit("spiller-splits-edges",
 //                                 Split Analysis
 //===----------------------------------------------------------------------===//
 
-SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
                              const LiveIntervals &lis,
                              const MachineLoopInfo &mli)
-  : mf_(mf),
-    lis_(lis),
-    loops_(mli),
-    tii_(*mf.getTarget().getInstrInfo()),
-    curli_(0) {}
+  : MF(vrm.getMachineFunction()),
+    VRM(vrm),
+    LIS(lis),
+    Loops(mli),
+    TII(*MF.getTarget().getInstrInfo()),
+    CurLI(0) {}
 
 void SplitAnalysis::clear() {
-  usingInstrs_.clear();
-  usingBlocks_.clear();
-  usingLoops_.clear();
-  curli_ = 0;
+  UseSlots.clear();
+  UsingInstrs.clear();
+  UsingBlocks.clear();
+  LiveBlocks.clear();
+  CurLI = 0;
 }
 
 bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
   MachineBasicBlock *T, *F;
   SmallVector<MachineOperand, 4> Cond;
-  return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+  return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
 }
 
-/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
 void SplitAnalysis::analyzeUses() {
-  const MachineRegisterInfo &MRI = mf_.getRegInfo();
-  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
-       MachineInstr *MI = I.skipInstruction();) {
-    if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
+       E = MRI.reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    if (MO.isUse() && MO.isUndef())
       continue;
-    MachineBasicBlock *MBB = MI->getParent();
-    if (usingBlocks_[MBB]++)
+    MachineInstr *MI = MO.getParent();
+    if (MI->isDebugValue() || !UsingInstrs.insert(MI))
       continue;
-    if (MachineLoop *Loop = loops_.getLoopFor(MBB))
-      usingLoops_[Loop]++;
+    UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
+    MachineBasicBlock *MBB = MI->getParent();
+    UsingBlocks[MBB]++;
   }
+  array_pod_sort(UseSlots.begin(), UseSlots.end());
+  calcLiveBlockInfo();
   DEBUG(dbgs() << "  counted "
-               << usingInstrs_.size() << " instrs, "
-               << usingBlocks_.size() << " blocks, "
-               << usingLoops_.size()  << " loops.\n");
+               << UsingInstrs.size() << " instrs, "
+               << UsingBlocks.size() << " blocks.\n");
 }
 
-/// removeUse - Update statistics by noting that MI no longer uses curli.
-void SplitAnalysis::removeUse(const MachineInstr *MI) {
-  if (!usingInstrs_.erase(MI))
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+void SplitAnalysis::calcLiveBlockInfo() {
+  if (CurLI->empty())
     return;
 
-  // Decrement MBB count.
-  const MachineBasicBlock *MBB = MI->getParent();
-  BlockCountMap::iterator bi = usingBlocks_.find(MBB);
-  assert(bi != usingBlocks_.end() && "MBB missing");
-  assert(bi->second && "0 count in map");
-  if (--bi->second)
-    return;
-  // No more uses in MBB.
-  usingBlocks_.erase(bi);
+  LiveInterval::const_iterator LVI = CurLI->begin();
+  LiveInterval::const_iterator LVE = CurLI->end();
+
+  SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+  UseI = UseSlots.begin();
+  UseE = UseSlots.end();
+
+  // Loop over basic blocks where CurLI is live.
+  MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  for (;;) {
+    BlockInfo BI;
+    BI.MBB = MFI;
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+    // The last split point is the latest possible insertion point that dominates
+    // all successor blocks. If interference reaches LastSplitPoint, it is not
+    // possible to insert a split or reload that makes CurLI live in the
+    // outgoing bundle.
+    MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
+    if (LSP == BI.MBB->end())
+      BI.LastSplitPoint = Stop;
+    else
+      BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
+
+    // LVI is the first live segment overlapping MBB.
+    BI.LiveIn = LVI->start <= Start;
+    if (!BI.LiveIn)
+      BI.Def = LVI->start;
+
+    // Find the first and last uses in the block.
+    BI.Uses = hasUses(MFI);
+    if (BI.Uses && UseI != UseE) {
+      BI.FirstUse = *UseI;
+      assert(BI.FirstUse >= Start);
+      do ++UseI;
+      while (UseI != UseE && *UseI < Stop);
+      BI.LastUse = UseI[-1];
+      assert(BI.LastUse < Stop);
+    }
 
-  // Decrement loop count.
-  MachineLoop *Loop = loops_.getLoopFor(MBB);
-  if (!Loop)
-    return;
-  LoopCountMap::iterator li = usingLoops_.find(Loop);
-  assert(li != usingLoops_.end() && "Loop missing");
-  assert(li->second && "0 count in map");
-  if (--li->second)
-    return;
-  // No more blocks in Loop.
-  usingLoops_.erase(li);
-}
+    // Look for gaps in the live range.
+    bool hasGap = false;
+    BI.LiveOut = true;
+    while (LVI->end < Stop) {
+      SlotIndex LastStop = LVI->end;
+      if (++LVI == LVE || LVI->start >= Stop) {
+        BI.Kill = LastStop;
+        BI.LiveOut = false;
+        break;
+      }
+      if (LastStop < LVI->start) {
+        hasGap = true;
+        BI.Kill = LastStop;
+        BI.Def = LVI->start;
+      }
+    }
 
-// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
-// predecessor blocks, and exit blocks.
-void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
-  Blocks.clear();
-
-  // Blocks in the loop.
-  Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
-
-  // Predecessor blocks.
-  const MachineBasicBlock *Header = Loop->getHeader();
-  for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
-       E = Header->pred_end(); I != E; ++I)
-    if (!Blocks.Loop.count(*I))
-      Blocks.Preds.insert(*I);
-
-  // Exit blocks.
-  for (MachineLoop::block_iterator I = Loop->block_begin(),
-       E = Loop->block_end(); I != E; ++I) {
-    const MachineBasicBlock *MBB = *I;
-    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
-       SE = MBB->succ_end(); SI != SE; ++SI)
-      if (!Blocks.Loop.count(*SI))
-        Blocks.Exits.insert(*SI);
-  }
-}
+    // Don't set LiveThrough when the block has a gap.
+    BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
+    LiveBlocks.push_back(BI);
 
-/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-/// and around the Loop.
-SplitAnalysis::LoopPeripheralUse SplitAnalysis::
-analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
-  LoopPeripheralUse use = ContainedInLoop;
-  for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
-       I != E; ++I) {
-    const MachineBasicBlock *MBB = I->first;
-    // Is this a peripheral block?
-    if (use < MultiPeripheral &&
-        (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
-      if (I->second > 1) use = MultiPeripheral;
-      else               use = SinglePeripheral;
-      continue;
-    }
-    // Is it a loop block?
-    if (Blocks.Loop.count(MBB))
-      continue;
-    // It must be an unrelated block.
-    return OutsideLoop;
-  }
-  return use;
-}
+    // LVI is now at LVE or LVI->end >= Stop.
+    if (LVI == LVE)
+      break;
 
-/// getCriticalExits - It may be necessary to partially break critical edges
-/// leaving the loop if an exit block has phi uses of curli. Collect the exit
-/// blocks that need special treatment into CriticalExits.
-void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
-                                     BlockPtrSet &CriticalExits) {
-  CriticalExits.clear();
-
-  // A critical exit block contains a phi def of curli, and has a predecessor
-  // that is not in the loop nor a loop predecessor.
-  // For such an exit block, the edges carrying the new variable must be moved
-  // to a new pre-exit block.
-  for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
-       I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
-    SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
-    VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
-    // This exit may not have curli live in at all. No need to split.
-    if (!SuccVNI)
-      continue;
-    // If this is not a PHI def, it is either using a value from before the
-    // loop, or a value defined inside the loop. Both are safe.
-    if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
-      continue;
-    // This exit block does have a PHI. Does it also have a predecessor that is
-    // not a loop block or loop predecessor?
-    for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
-         PE = Succ->pred_end(); PI != PE; ++PI) {
-      const MachineBasicBlock *Pred = *PI;
-      if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
-        continue;
-      // This is a critical exit block, and we need to split the exit edge.
-      CriticalExits.insert(Succ);
+    // Live segment ends exactly at Stop. Move to the next segment.
+    if (LVI->end == Stop && ++LVI == LVE)
       break;
-    }
+
+    // Pick the next basic block.
+    if (LVI->start < Stop)
+      ++MFI;
+    else
+      MFI = LIS.getMBBFromIndex(LVI->start);
   }
 }
 
-/// canSplitCriticalExits - Return true if it is possible to insert new exit
-/// blocks before the blocks in CriticalExits.
-bool
-SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
-                                     BlockPtrSet &CriticalExits) {
-  // If we don't allow critical edge splitting, require no critical exits.
-  if (!AllowSplit)
-    return CriticalExits.empty();
-
-  for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
-       I != E; ++I) {
-    const MachineBasicBlock *Succ = *I;
-    // We want to insert a new pre-exit MBB before Succ, and change all the
-    // in-loop blocks to branch to the pre-exit instead of Succ.
-    // Check that all the in-loop predecessors can be changed.
-    for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
-         PE = Succ->pred_end(); PI != PE; ++PI) {
-      const MachineBasicBlock *Pred = *PI;
-      // The external predecessors won't be altered.
-      if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
-        continue;
-      if (!canAnalyzeBranch(Pred))
-        return false;
-    }
-
-    // If Succ's layout predecessor falls through, that too must be analyzable.
-    // We need to insert the pre-exit block in the gap.
-    MachineFunction::const_iterator MFI = Succ;
-    if (MFI == mf_.begin())
-      continue;
-    if (!canAnalyzeBranch(--MFI))
-      return false;
+void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
+  for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    unsigned count = UsingBlocks.lookup(*I);
+    OS << " BB#" << (*I)->getNumber();
+    if (count)
+      OS << '(' << count << ')';
   }
-  // No problems found.
-  return true;
 }
 
 void SplitAnalysis::analyze(const LiveInterval *li) {
   clear();
-  curli_ = li;
+  CurLI = li;
   analyzeUses();
 }
 
-const MachineLoop *SplitAnalysis::getBestSplitLoop() {
-  assert(curli_ && "Call analyze() before getBestSplitLoop");
-  if (usingLoops_.empty())
-    return 0;
-
-  LoopPtrSet Loops, SecondLoops;
-  LoopBlocks Blocks;
-  BlockPtrSet CriticalExits;
-
-  // Find first-class and second class candidate loops.
-  // We prefer to split around loops where curli is used outside the periphery.
-  for (LoopCountMap::const_iterator I = usingLoops_.begin(),
-       E = usingLoops_.end(); I != E; ++I) {
-    const MachineLoop *Loop = I->first;
-    getLoopBlocks(Loop, Blocks);
-
-    // FIXME: We need an SSA updater to properly handle multiple exit blocks.
-    if (Blocks.Exits.size() > 1) {
-      DEBUG(dbgs() << "  multiple exits from " << *Loop);
-      continue;
-    }
-
-    LoopPtrSet *LPS = 0;
-    switch(analyzeLoopPeripheralUse(Blocks)) {
-    case OutsideLoop:
-      LPS = &Loops;
-      break;
-    case MultiPeripheral:
-      LPS = &SecondLoops;
-      break;
-    case ContainedInLoop:
-      DEBUG(dbgs() << "  contained in " << *Loop);
-      continue;
-    case SinglePeripheral:
-      DEBUG(dbgs() << "  single peripheral use in " << *Loop);
-      continue;
-    }
-    // Will it be possible to split around this loop?
-    getCriticalExits(Blocks, CriticalExits);
-    DEBUG(dbgs() << "  " << CriticalExits.size() << " critical exits from "
-                 << *Loop);
-    if (!canSplitCriticalExits(Blocks, CriticalExits))
-      continue;
-    // This is a possible split.
-    assert(LPS);
-    LPS->insert(Loop);
-  }
-
-  DEBUG(dbgs() << "  getBestSplitLoop found " << Loops.size() << " + "
-               << SecondLoops.size() << " candidate loops.\n");
-
-  // If there are no first class loops available, look at second class loops.
-  if (Loops.empty())
-    Loops = SecondLoops;
 
-  if (Loops.empty())
-    return 0;
+//===----------------------------------------------------------------------===//
+//                               LiveIntervalMap
+//===----------------------------------------------------------------------===//
 
-  // Pick the earliest loop.
-  // FIXME: Are there other heuristics to consider?
-  const MachineLoop *Best = 0;
-  SlotIndex BestIdx;
-  for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
-       ++I) {
-    SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
-    if (!Best || Idx < BestIdx)
-      Best = *I, BestIdx = Idx;
-  }
-  DEBUG(dbgs() << "  getBestSplitLoop found " << *Best);
-  return Best;
+// Work around the fact that the std::pair constructors are broken for pointer
+// pairs in some implementations. makeVV(x, 0) works.
+static inline std::pair<const VNInfo*, VNInfo*>
+makeVV(const VNInfo *a, VNInfo *b) {
+  return std::make_pair(a, b);
 }
 
-/// getMultiUseBlocks - if curli has more than one use in a basic block, it
-/// may be an advantage to split curli for the duration of the block.
-bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
-  // If curli is local to one block, there is no point to splitting it.
-  if (usingBlocks_.size() <= 1)
-    return false;
-  // Add blocks with multiple uses.
-  for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
-       I != E; ++I)
-    switch (I->second) {
-    case 0:
-    case 1:
-      continue;
-    case 2: {
-      // It doesn't pay to split a 2-instr block if it redefines curli.
-      VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
-      VNInfo *VN2 =
-        curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
-      // live-in and live-out with a different value.
-      if (VN1 && VN2 && VN1 != VN2)
-        continue;
-    } // Fall through.
-    default:
-      Blocks.insert(I->first);
-    }
-  return !Blocks.empty();
+void LiveIntervalMap::reset(LiveInterval *li) {
+  LI = li;
+  Values.clear();
+  LiveOutCache.clear();
 }
 
-//===----------------------------------------------------------------------===//
-//                               LiveIntervalMap
-//===----------------------------------------------------------------------===//
+bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const {
+  ValueMap::const_iterator i = Values.find(ParentVNI);
+  return i != Values.end() && i->second == 0;
+}
 
-// defValue - Introduce a li_ def for ParentVNI that could be later than
+// defValue - Introduce a LI def for ParentVNI that could be later than
 // ParentVNI->def.
 VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+  assert(LI && "call reset first");
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
-
-  // Is this a simple 1-1 mapping? Not likely.
-  if (Idx == ParentVNI->def)
-    return mapValue(ParentVNI, Idx);
-
-  // This is a complex def. Mark with a NULL in valueMap.
-  VNInfo *OldVNI =
-    valueMap_.insert(
-      ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
-      // The static_cast<VNInfo *> is only needed to work around a bug in an
-      // old version of the C++0x standard which the following compilers
-      // implemented and have yet to fix:
-      //
-      // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
-      // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
-      //
-      // If/When we move to C++0x, this can be replaced by nullptr.
-  (void)OldVNI;
-  assert(OldVNI == 0 && "Simple/Complex values mixed");
-
-  // Should we insert a minimal snippet of VNI LiveRange, or can we count on
-  // callers to do that? We need it for lookups of complex values.
-  VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+  // Create a new value.
+  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+  // Preserve the PHIDef bit.
+  if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
+    VNI->setIsPHIDef(true);
+
+  // Use insert for lookup, so we can add missing values with a second lookup.
+  std::pair<ValueMap::iterator,bool> InsP =
+    Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0));
+
+  // This is now a complex def. Mark with a NULL in valueMap.
+  if (!InsP.second)
+    InsP.first->second = 0;
+
   return VNI;
 }
 
+
 // mapValue - Find the mapped value for ParentVNI at Idx.
 // Potentially create phi-def values.
-VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
+                                  bool *simple) {
+  assert(LI && "call reset first");
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
-  assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+  assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
 
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator,bool> InsP =
-    valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
-    // The static_cast<VNInfo *> is only needed to work around a bug in an
-    // old version of the C++0x standard which the following compilers
-    // implemented and have yet to fix:
-    //
-    // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
-    // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
-    //
-    // If/When we move to C++0x, this can be replaced by nullptr.
+    Values.insert(makeVV(ParentVNI, 0));
 
   // This was an unknown value. Create a simple mapping.
-  if (InsP.second)
-    return InsP.first->second = li_.createValueCopy(ParentVNI,
-                                                    lis_.getVNInfoAllocator());
+  if (InsP.second) {
+    if (simple) *simple = true;
+    return InsP.first->second = LI->createValueCopy(ParentVNI,
+                                                     LIS.getVNInfoAllocator());
+  }
+
   // This was a simple mapped value.
-  if (InsP.first->second)
+  if (InsP.first->second) {
+    if (simple) *simple = true;
     return InsP.first->second;
+  }
 
   // This is a complex mapped value. There may be multiple defs, and we may need
   // to create phi-defs.
-  MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+  if (simple) *simple = false;
+  MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
   assert(IdxMBB && "No MBB at Idx");
 
   // Is there a def in the same MBB we can extend?
@@ -409,157 +270,260 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
 
   // Now for the fun part. We know that ParentVNI potentially has multiple defs,
   // and we may need to create even more phi-defs to preserve VNInfo SSA form.
-  // Perform a depth-first search for predecessor blocks where we know the
-  // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
-
-  // Track MBBs where we have created or learned the dominating value.
-  // This may change during the DFS as we create new phi-defs.
-  typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
-  MBBValueMap DomValue;
-
-  for (idf_iterator<MachineBasicBlock*>
-         IDFI = idf_begin(IdxMBB),
-         IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
-    MachineBasicBlock *MBB = *IDFI;
-    SlotIndex End = lis_.getMBBEndIdx(MBB);
-
-    // We are operating on the restricted CFG where ParentVNI is live.
-    if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
-      IDFI.skipChildren();
-      continue;
-    }
-
-    // Do we have a dominating value in this block?
-    VNInfo *VNI = extendTo(MBB, End);
-    if (!VNI) {
-      ++IDFI;
-      continue;
+  // Perform a search for all predecessor blocks where we know the dominating
+  // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+  DEBUG(dbgs() << "\n  Reaching defs for BB#" << IdxMBB->getNumber()
+               << " at " << Idx << " in " << *LI << '\n');
+
+  // Blocks where LI should be live-in.
+  SmallVector<MachineDomTreeNode*, 16> LiveIn;
+  LiveIn.push_back(MDT[IdxMBB]);
+
+  // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+  for (unsigned i = 0; i != LiveIn.size(); ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+       MachineBasicBlock *Pred = *PI;
+       // Is this a known live-out block?
+       std::pair<LiveOutMap::iterator,bool> LOIP =
+         LiveOutCache.insert(std::make_pair(Pred, LiveOutPair()));
+       // Yes, we have been here before.
+       if (!LOIP.second) {
+         DEBUG(if (VNInfo *VNI = LOIP.first->second.first)
+                 dbgs() << "    known valno #" << VNI->id
+                        << " at BB#" << Pred->getNumber() << '\n');
+         continue;
+       }
+
+       // Does Pred provide a live-out value?
+       SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot();
+       if (VNInfo *VNI = extendTo(Pred, Last)) {
+         MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def);
+         DEBUG(dbgs() << "    found valno #" << VNI->id
+                      << " from BB#" << DefMBB->getNumber()
+                      << " at BB#" << Pred->getNumber() << '\n');
+         LiveOutPair &LOP = LOIP.first->second;
+         LOP.first = VNI;
+         LOP.second = MDT[DefMBB];
+         continue;
+       }
+       // No, we need a live-in value for Pred as well
+       if (Pred != IdxMBB)
+         LiveIn.push_back(MDT[Pred]);
     }
+  }
 
-    // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
-    // as needed along the way.
-    for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
-      // Start from MBB's immediate successor. End at IdxMBB.
-      MachineBasicBlock *Succ = IDFI.getPath(PI-1);
-      std::pair<MBBValueMap::iterator, bool> InsP =
-        DomValue.insert(MBBValueMap::value_type(Succ, VNI));
-
-      // This is the first time we backtrack to Succ.
-      if (InsP.second)
-        continue;
-
-      // We reached Succ again with the same VNI. Nothing is going to change.
-      VNInfo *OVNI = InsP.first->second;
-      if (OVNI == VNI)
-        break;
+  // We may need to add phi-def values to preserve the SSA form.
+  // This is essentially the same iterative algorithm that SSAUpdater uses,
+  // except we already have a dominator tree, so we don't have to recompute it.
+  VNInfo *IdxVNI = 0;
+  unsigned Changes;
+  do {
+    Changes = 0;
+    DEBUG(dbgs() << "  Iterating over " << LiveIn.size() << " blocks.\n");
+    // Propagate live-out values down the dominator tree, inserting phi-defs when
+    // necessary. Since LiveIn was created by a BFS, going backwards makes it more
+    // likely for us to visit immediate dominators before their children.
+    for (unsigned i = LiveIn.size(); i; --i) {
+      MachineDomTreeNode *Node = LiveIn[i-1];
+      MachineBasicBlock *MBB = Node->getBlock();
+      MachineDomTreeNode *IDom = Node->getIDom();
+      LiveOutPair IDomValue;
+      // We need a live-in value to a block with no immediate dominator?
+      // This is probably an unreachable block that has survived somehow.
+      bool needPHI = !IDom;
+
+      // Get the IDom live-out value.
+      if (!needPHI) {
+        LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock());
+        if (I != LiveOutCache.end())
+          IDomValue = I->second;
+        else
+          // If IDom is outside our set of live-out blocks, there must be new
+          // defs, and we need a phi-def here.
+          needPHI = true;
+      }
 
-      // Succ already has a phi-def. No need to continue.
-      SlotIndex Start = lis_.getMBBStartIdx(Succ);
-      if (OVNI->def == Start)
-        break;
+      // IDom dominates all of our predecessors, but it may not be the immediate
+      // dominator. Check if any of them have live-out values that are properly
+      // dominated by IDom. If so, we need a phi-def here.
+      if (!needPHI) {
+        for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+               PE = MBB->pred_end(); PI != PE; ++PI) {
+          LiveOutPair Value = LiveOutCache[*PI];
+          if (!Value.first || Value.first == IDomValue.first)
+            continue;
+          // This predecessor is carrying something other than IDomValue.
+          // It could be because IDomValue hasn't propagated yet, or it could be
+          // because MBB is in the dominance frontier of that value.
+          if (MDT.dominates(IDom, Value.second)) {
+            needPHI = true;
+            break;
+          }
+        }
+      }
 
-      // We have a collision between the old and new VNI at Succ. That means
-      // neither dominates and we need a new phi-def.
-      VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
-      VNI->setIsPHIDef(true);
-      InsP.first->second = VNI;
-
-      // Replace OVNI with VNI in the remaining path.
-      for (; PI > 1 ; --PI) {
-        MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
-        if (I == DomValue.end() || I->second != OVNI)
-          break;
-        I->second = VNI;
+      // Create a phi-def if required.
+      if (needPHI) {
+        ++Changes;
+        SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+        VNI->setIsPHIDef(true);
+        DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
+                     << " phi-def #" << VNI->id << " at " << Start << '\n');
+        // We no longer need LI to be live-in.
+        LiveIn.erase(LiveIn.begin()+(i-1));
+        // Blocks in LiveIn are either IdxMBB, or have a value live-through.
+        if (MBB == IdxMBB)
+          IdxVNI = VNI;
+        // Check if we need to update live-out info.
+        LiveOutMap::iterator I = LiveOutCache.find(MBB);
+        if (I == LiveOutCache.end() || I->second.second == Node) {
+          // We already have a live-out defined in MBB, so this must be IdxMBB.
+          assert(MBB == IdxMBB && "Adding phi-def to known live-out");
+          LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
+        } else {
+          // This phi-def is also live-out, so color the whole block.
+          LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+          I->second = LiveOutPair(VNI, Node);
+        }
+      } else if (IDomValue.first) {
+        // No phi-def here. Remember incoming value for IdxMBB.
+        if (MBB == IdxMBB)
+          IdxVNI = IDomValue.first;
+        // Propagate IDomValue if needed:
+        // MBB is live-out and doesn't define its own value.
+        LiveOutMap::iterator I = LiveOutCache.find(MBB);
+        if (I != LiveOutCache.end() && I->second.second != Node &&
+            I->second.first != IDomValue.first) {
+          ++Changes;
+          I->second = IDomValue;
+          DEBUG(dbgs() << "    - BB#" << MBB->getNumber()
+                       << " idom valno #" << IDomValue.first->id
+                       << " from BB#" << IDom->getBlock()->getNumber() << '\n');
+        }
       }
     }
+    DEBUG(dbgs() << "  - made " << Changes << " changes.\n");
+  } while (Changes);
 
-    // No need to search the children, we found a dominating value.
-    IDFI.skipChildren();
-  }
+  assert(IdxVNI && "Didn't find value for Idx");
 
-  // The search should at least find a dominating value for IdxMBB.
-  assert(!DomValue.empty() && "Couldn't find a reaching definition");
+#ifndef NDEBUG
+  // Check the LiveOutCache invariants.
+  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+         I != E; ++I) {
+    assert(I->first && "Null MBB entry in cache");
+    assert(I->second.first && "Null VNInfo in cache");
+    assert(I->second.second && "Null DomTreeNode in cache");
+    if (I->second.second->getBlock() == I->first)
+      continue;
+    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+           PE = I->first->pred_end(); PI != PE; ++PI)
+      assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant");
+  }
+#endif
 
-  // Since we went through the trouble of a full DFS visiting all reaching defs,
-  // the values in DomValue are now accurate. No more phi-defs are needed for
-  // these blocks, so we can color the live ranges.
+  // Since we went through the trouble of a full BFS visiting all reaching defs,
+  // the values in LiveIn are now accurate. No more phi-defs are needed
+  // for these blocks, so we can color the live ranges.
   // This makes the next mapValue call much faster.
-  VNInfo *IdxVNI = 0;
-  for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
-       ++I) {
-     MachineBasicBlock *MBB = I->first;
-     VNInfo *VNI = I->second;
-     SlotIndex Start = lis_.getMBBStartIdx(MBB);
-     if (MBB == IdxMBB) {
-       // Don't add full liveness to IdxMBB, stop at Idx.
-       if (Start != Idx)
-         li_.addRange(LiveRange(Start, Idx, VNI));
-       // The caller had better add some liveness to IdxVNI, or it leaks.
-       IdxVNI = VNI;
-     } else
-      li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+  for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    VNInfo *VNI = LiveOutCache.lookup(MBB).first;
+
+    // Anything in LiveIn other than IdxMBB is live-through.
+    // In IdxMBB, we should stop at Idx unless the same value is live-out.
+    if (MBB == IdxMBB && IdxVNI != VNI)
+      LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
+    else
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
   }
 
-  assert(IdxVNI && "Didn't find value for Idx");
   return IdxVNI;
 }
 
-// extendTo - Find the last li_ value defined in MBB at or before Idx. The
-// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+#ifndef NDEBUG
+void LiveIntervalMap::dumpCache() {
+  for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+         I != E; ++I) {
+    assert(I->first && "Null MBB entry in cache");
+    assert(I->second.first && "Null VNInfo in cache");
+    assert(I->second.second && "Null DomTreeNode in cache");
+    dbgs() << "    cache: BB#" << I->first->getNumber()
+           << " has valno #" << I->second.first->id << " from BB#"
+           << I->second.second->getBlock()->getNumber() << ", preds";
+    for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+           PE = I->first->pred_end(); PI != PE; ++PI)
+      dbgs() << " BB#" << (*PI)->getNumber();
+    dbgs() << '\n';
+  }
+  dbgs() << "    cache: " << LiveOutCache.size() << " entries.\n";
+}
+#endif
+
+// extendTo - Find the last LI value defined in MBB at or before Idx. The
+// ParentLI is assumed to be live at Idx. Extend the live range to Idx.
 // Return the found VNInfo, or NULL.
-VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
-  LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
-  if (I == li_.begin())
+VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) {
+  assert(LI && "call reset first");
+  LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx);
+  if (I == LI->begin())
     return 0;
   --I;
-  if (I->start < lis_.getMBBStartIdx(MBB))
+  if (I->end <= LIS.getMBBStartIdx(MBB))
     return 0;
-  if (I->end < Idx)
-    I->end = Idx;
+  if (I->end <= Idx)
+    I->end = Idx.getNextSlot();
   return I->valno;
 }
 
-// addSimpleRange - Add a simple range from parentli_ to li_.
+// addSimpleRange - Add a simple range from ParentLI to LI.
 // ParentVNI must be live in the [Start;End) interval.
 void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
                                      const VNInfo *ParentVNI) {
-  VNInfo *VNI = mapValue(ParentVNI, Start);
-  // A simple mappoing is easy.
-  if (VNI->def == ParentVNI->def) {
-    li_.addRange(LiveRange(Start, End, VNI));
+  assert(LI && "call reset first");
+  bool simple;
+  VNInfo *VNI = mapValue(ParentVNI, Start, &simple);
+  // A simple mapping is easy.
+  if (simple) {
+    LI->addRange(LiveRange(Start, End, VNI));
     return;
   }
 
   // ParentVNI is a complex value. We must map per MBB.
-  MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
-  MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+  MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+  MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot());
 
   if (MBB == MBBE) {
-    li_.addRange(LiveRange(Start, End, VNI));
+    LI->addRange(LiveRange(Start, End, VNI));
     return;
   }
 
   // First block.
-  li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+  LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
 
   // Run sequence of full blocks.
   for (++MBB; MBB != MBBE; ++MBB) {
-    Start = lis_.getMBBStartIdx(MBB);
-    li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
-                           mapValue(ParentVNI, Start)));
+    Start = LIS.getMBBStartIdx(MBB);
+    LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB),
+                            mapValue(ParentVNI, Start)));
   }
 
   // Final block.
-  Start = lis_.getMBBStartIdx(MBB);
+  Start = LIS.getMBBStartIdx(MBB);
   if (Start != End)
-    li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+    LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
 }
 
-/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
 /// All needed values whose def is not inside [Start;End) must be defined
 /// beforehand so mapValue will work.
 void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
-  LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+  assert(LI && "call reset first");
+  LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end();
   LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
 
   // Check if --I begins before Start and overlaps.
@@ -575,403 +539,374 @@ void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
     addSimpleRange(I->start, std::min(End, I->end), I->valno);
 }
 
+
 //===----------------------------------------------------------------------===//
 //                               Split Editor
 //===----------------------------------------------------------------------===//
 
 /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
-                         SmallVectorImpl<LiveInterval*> &intervals)
-  : sa_(sa), lis_(lis), vrm_(vrm),
-    mri_(vrm.getMachineFunction().getRegInfo()),
-    tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
-    curli_(sa_.getCurLI()),
-    dupli_(0), openli_(0),
-    intervals_(intervals),
-    firstInterval(intervals_.size())
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+                         LiveIntervals &lis,
+                         VirtRegMap &vrm,
+                         MachineDominatorTree &mdt,
+                         LiveRangeEdit &edit)
+  : SA(sa), LIS(lis), VRM(vrm),
+    MRI(vrm.getMachineFunction().getRegInfo()),
+    MDT(mdt),
+    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    Edit(edit),
+    OpenIdx(0),
+    RegAssign(Allocator)
 {
-  assert(curli_ && "SplitEditor created from empty SplitAnalysis");
-
-  // Make sure curli_ is assigned a stack slot, so all our intervals get the
-  // same slot as curli_.
-  if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
-    vrm_.assignVirt2StackSlot(curli_->reg);
-
+  // We don't need an AliasAnalysis since we will only be performing
+  // cheap-as-a-copy remats anyway.
+  Edit.anyRematerializable(LIS, TII, 0);
 }
 
-LiveInterval *SplitEditor::createInterval() {
-  unsigned curli = sa_.getCurLI()->reg;
-  unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
-  LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
-  vrm_.grow();
-  vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
-  return &Intv;
+void SplitEditor::dump() const {
+  if (RegAssign.empty()) {
+    dbgs() << " empty\n";
+    return;
+  }
+
+  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+  dbgs() << '\n';
 }
 
-LiveInterval *SplitEditor::getDupLI() {
-  if (!dupli_) {
-    // Create an interval for dupli that is a copy of curli.
-    dupli_ = createInterval();
-    dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+                                   VNInfo *ParentVNI,
+                                   SlotIndex UseIdx,
+                                   MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I) {
+  MachineInstr *CopyMI = 0;
+  SlotIndex Def;
+  LiveInterval *LI = Edit.get(RegIdx);
+
+  // Attempt cheap-as-a-copy rematerialization.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) {
+    Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+  } else {
+    // Can't remat, just insert a copy from parent.
+    CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+               .addReg(Edit.getReg());
+    Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
   }
-  return dupli_;
-}
 
-VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
-  VNInfo *&VNI = valueMap_[curliVNI];
-  if (!VNI)
-    VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
-  return VNI;
-}
+  // Define the value in Reg.
+  VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def);
+  VNI->setCopy(CopyMI);
 
-/// Insert a COPY instruction curli -> li. Allocate a new value from li
-/// defined by the COPY. Note that rewrite() will deal with the curli
-/// register, so this function can be used to copy from any interval - openli,
-/// curli, or dupli.
-VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
-                                MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I) {
-  MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
-                             LI.reg).addReg(curli_->reg);
-  SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+  // Add minimal liveness for the new value.
+  Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  return VNI;
 }
 
 /// Create a new virtual register and live interval.
 void SplitEditor::openIntv() {
-  assert(!openli_ && "Previous LI not closed before openIntv");
-  openli_ = createInterval();
-  intervals_.push_back(openli_);
-  liveThrough_ = false;
-}
+  assert(!OpenIdx && "Previous LI not closed before openIntv");
 
-/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-/// not live before Idx, a COPY is not inserted.
-void SplitEditor::enterIntvBefore(SlotIndex Idx) {
-  assert(openli_ && "openIntv not called before enterIntvBefore");
-
-  // Copy from curli_ if it is live.
-  if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
-    MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
-    assert(MI && "enterIntvBefore called with invalid index");
-    VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
-    openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
-
-    // Make sure CurVNI is properly mapped.
-    VNInfo *&mapVNI = valueMap_[CurVNI];
-    // We dont have SSA update yet, so only one entry per value is allowed.
-    assert(!mapVNI && "enterIntvBefore called more than once for the same value");
-    mapVNI = VNI;
+  // Create the complement as index 0.
+  if (Edit.empty()) {
+    Edit.create(MRI, LIS, VRM);
+    LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+    LIMappers.back().reset(Edit.get(0));
   }
-  DEBUG(dbgs() << "    enterIntvBefore " << Idx << ": " << *openli_ << '\n');
-}
 
-/// enterIntvAtEnd - Enter openli at the end of MBB.
-/// PhiMBB is a successor inside openli where a PHI value is created.
-/// Currently, all entries must share the same PhiMBB.
-void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
-  assert(openli_ && "openIntv not called before enterIntvAtEnd");
-
-  SlotIndex EndA = lis_.getMBBEndIdx(&A);
-  VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
-  if (!CurVNIA) {
-    DEBUG(dbgs() << "    enterIntvAtEnd, curli not live out of BB#"
-                 << A.getNumber() << ".\n");
-    return;
-  }
+  // Create the open interval.
+  OpenIdx = Edit.size();
+  Edit.create(MRI, LIS, VRM);
+  LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+  LIMappers[OpenIdx].reset(Edit.get(OpenIdx));
+}
 
-  // Add a phi kill value and live range out of A.
-  VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
-  openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
-
-  // FIXME: If this is the only entry edge, we don't need the extra PHI value.
-  // FIXME: If there are multiple entry blocks (so not a loop), we need proper
-  // SSA update.
-
-  // Now look at the start of B.
-  SlotIndex StartB = lis_.getMBBStartIdx(&B);
-  SlotIndex EndB = lis_.getMBBEndIdx(&B);
-  const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
-  if (!CurB) {
-    DEBUG(dbgs() << "    enterIntvAtEnd: curli not live in to BB#"
-                 << B.getNumber() << ".\n");
-    return;
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvBefore");
+  DEBUG(dbgs() << "    enterIntvBefore " << Idx);
+  Idx = Idx.getBaseIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvBefore called with invalid index");
 
-  VNInfo *VNIB = openli_->getVNInfoAt(StartB);
-  if (!VNIB) {
-    // Create a phi value.
-    VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
-                                 lis_.getVNInfoAllocator());
-    VNIB->setIsPHIDef(true);
-    VNInfo *&mapVNI = valueMap_[CurB->valno];
-    if (mapVNI) {
-      // Multiple copies - must create PHI value.
-      abort();
-    } else {
-      // This is the first copy of dupLR. Mark the mapping.
-      mapVNI = VNIB;
-    }
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
 
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+  SlotIndex End = LIS.getMBBEndIdx(&MBB);
+  SlotIndex Last = End.getPrevSlot();
+  DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return End;
   }
-
-  DEBUG(dbgs() << "    enterIntvAtEnd: " << *openli_ << '\n');
+  DEBUG(dbgs() << ": valno " << ParentVNI->id);
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+                              LIS.getLastSplitPoint(Edit.getParent(), &MBB));
+  RegAssign.insert(VNI->def, End, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
 }
 
-/// useIntv - indicate that all instructions in MBB should use openli.
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
 void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
-  useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+  useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
 }
 
 void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
-  assert(openli_ && "openIntv not called before useIntv");
+  assert(OpenIdx && "openIntv not called before useIntv");
+  DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
 
-  // Map the curli values from the interval into openli_
-  LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
-  LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+  DEBUG(dbgs() << "    leaveIntvAfter " << Idx);
 
-  if (I != B) {
-    --I;
-    // I begins before Start, but overlaps.
-    if (I->end > Start)
-      openli_->addRange(LiveRange(Start, std::min(End, I->end),
-                        mapValue(I->valno)));
-    ++I;
+  // The interval must be live beyond the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
 
-  // The remaining ranges begin after Start.
-  for (;I != E && I->start < End; ++I)
-    openli_->addRange(LiveRange(I->start, std::min(End, I->end),
-                                mapValue(I->valno)));
-  DEBUG(dbgs() << "    use [" << Start << ';' << End << "): " << *openli_
-               << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
 }
 
-/// leaveIntvAfter - Leave openli after the instruction at Idx.
-void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
-  assert(openli_ && "openIntv not called before leaveIntvAfter");
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+  DEBUG(dbgs() << "    leaveIntvBefore " << Idx);
 
-  const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
-  if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
-    DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": not live\n");
-    return;
+  // The interval must be live into the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
   }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
 
-  // Was this value of curli live through openli?
-  if (!openli_->liveAt(CurLR->valno->def)) {
-    DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": using external value\n");
-    liveThrough_ = true;
-    return;
-  }
-
-  // We are going to insert a back copy, so we must have a dupli_.
-  LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
-  assert(DupLR && "dupli not live into black, but curli is?");
-
-  // Insert the COPY instruction.
-  MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
-  MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
-                             tii_.get(TargetOpcode::COPY), dupli_->reg)
-                       .addReg(openli_->reg);
-  SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-  openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
-                    mapValue(CurLR->valno)));
-  DupLR->valno->def = CopyIdx;
-  DEBUG(dbgs() << "    leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
 }
 
-/// leaveIntvAtTop - Leave the interval at the top of MBB.
-/// Currently, only one value can leave the interval.
-void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
-  assert(openli_ && "openIntv not called before leaveIntvAtTop");
-
-  SlotIndex Start = lis_.getMBBStartIdx(&MBB);
-  const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
-
-  // Is curli even live-in to MBB?
-  if (!CurLR) {
-    DEBUG(dbgs() << "    leaveIntvAtTop at " << Start << ": not live\n");
-    return;
-  }
-
-  // Is curli defined by PHI at the beginning of MBB?
-  bool isPHIDef = CurLR->valno->isPHIDef() &&
-                  CurLR->valno->def.getBaseIndex() == Start;
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+  SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+  DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
 
-  // If MBB is using a value of curli that was defined outside the openli range,
-  // we don't want to copy it back here.
-  if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
-    DEBUG(dbgs() << "    leaveIntvAtTop at " << Start
-                 << ": using external value\n");
-    liveThrough_ = true;
-    return;
+  VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Start;
   }
 
-  // We are going to insert a back copy, so we must have a dupli_.
-  LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
-  assert(DupLR && "dupli not live into black, but curli is?");
-
-  // Insert the COPY instruction.
-  MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
-                             tii_.get(TargetOpcode::COPY), dupli_->reg)
-                       .addReg(openli_->reg);
-  SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-
-  // Adjust dupli and openli values.
-  if (isPHIDef) {
-    // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
-    // and shift the dupli def down to the COPY.
-    VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
-                                        lis_.getVNInfoAllocator());
-    VNI->setIsPHIDef(true);
-    openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
-    dupli_->removeRange(Start, Idx);
-    DupLR->valno->def = Idx;
-    DupLR->valno->setIsPHIDef(false);
-  } else {
-    // The dupli value was defined somewhere inside the openli range.
-    DEBUG(dbgs() << "    leaveIntvAtTop source value defined at "
-                 << DupLR->valno->def << "\n");
-    // FIXME: We may not need a PHI here if all predecessors have the same
-    // value.
-    VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
-                                        lis_.getVNInfoAllocator());
-    VNI->setIsPHIDef(true);
-    openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
-    // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
-
-    // closeIntv is going to remove the superfluous live ranges.
-    DupLR->valno->def = Idx;
-    DupLR->valno->setIsPHIDef(false);
-  }
+  VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+                              MBB.SkipPHIsAndLabels(MBB.begin()));
+  RegAssign.insert(Start, VNI->def, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
 
-  DEBUG(dbgs() << "    leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before overlapIntv");
+  assert(Edit.getParent().getVNInfoAt(Start) ==
+         Edit.getParent().getVNInfoAt(End.getPrevSlot()) &&
+         "Parent changes value in extended range");
+  assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*");
+  assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+         "Range cannot span basic blocks");
+
+  // Treat this as useIntv() for now. The complement interval will be extended
+  // as needed by mapValue().
+  DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
 }
 
 /// closeIntv - Indicate that we are done editing the currently open
 /// LiveInterval, and ranges can be trimmed.
 void SplitEditor::closeIntv() {
-  assert(openli_ && "openIntv not called before closeIntv");
-
-  DEBUG(dbgs() << "    closeIntv cleaning up\n");
-  DEBUG(dbgs() << "    open " << *openli_ << '\n');
-
-  if (liveThrough_) {
-    DEBUG(dbgs() << "    value live through region, leaving dupli as is.\n");
-  } else {
-    // live out with copies inserted, or killed by region. Either way we need to
-    // remove the overlapping region from dupli.
-    getDupLI();
-    for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
-         I != E; ++I) {
-      dupli_->removeRange(I->start, I->end);
-    }
-    // FIXME: A block branching to the entry block may also branch elsewhere
-    // curli is live. We need both openli and curli to be live in that case.
-    DEBUG(dbgs() << "    dup2 " << *dupli_ << '\n');
-  }
-  openli_ = 0;
-  valueMap_.clear();
+  assert(OpenIdx && "openIntv not called before closeIntv");
+  OpenIdx = 0;
 }
 
-/// rewrite - after all the new live ranges have been created, rewrite
-/// instructions using curli to use the new intervals.
-void SplitEditor::rewrite() {
-  assert(!openli_ && "Previous LI not closed before rewrite");
-  const LiveInterval *curli = sa_.getCurLI();
-  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
-       RE = mri_.reg_end(); RI != RE;) {
+/// rewriteAssigned - Rewrite all uses of Edit.getReg().
+void SplitEditor::rewriteAssigned() {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()),
+       RE = MRI.reg_end(); RI != RE;) {
     MachineOperand &MO = RI.getOperand();
     MachineInstr *MI = MO.getParent();
     ++RI;
+    // LiveDebugVariables should have handled all DBG_VALUE instructions.
     if (MI->isDebugValue()) {
       DEBUG(dbgs() << "Zapping " << *MI);
-      // FIXME: We can do much better with debug values.
       MO.setReg(0);
       continue;
     }
-    SlotIndex Idx = lis_.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
-    LiveInterval *LI = dupli_;
-    for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
-      LiveInterval *testli = intervals_[i];
-      if (testli->liveAt(Idx)) {
-        LI = testli;
-        break;
-      }
-    }
-    if (LI) {
-      MO.setReg(LI->reg);
-      sa_.removeUse(MI);
-      DEBUG(dbgs() << "  rewrite " << Idx << '\t' << *MI);
-    }
-  }
 
-  // dupli_ goes in last, after rewriting.
-  if (dupli_) {
-    if (dupli_->empty()) {
-      DEBUG(dbgs() << "  dupli became empty?\n");
-      lis_.removeInterval(dupli_->reg);
-      dupli_ = 0;
-    } else {
-      dupli_->RenumberValues(lis_);
-      intervals_.push_back(dupli_);
+    // <undef> operands don't really read the register, so just assign them to
+    // the complement.
+    if (MO.isUse() && MO.isUndef()) {
+      MO.setReg(Edit.get(0)->reg);
+      continue;
     }
+
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+    // Rewrite to the mapped register at Idx.
+    unsigned RegIdx = RegAssign.lookup(Idx);
+    MO.setReg(Edit.get(RegIdx)->reg);
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':' << RegIdx << '\t' << *MI);
+
+    // Extend liveness to Idx.
+    const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+    LIMappers[RegIdx].mapValue(ParentVNI, Idx);
   }
+}
 
-  // Calculate spill weight and allocation hints for new intervals.
-  VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
-  for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
-    LiveInterval &li = *intervals_[i];
-    vrai.CalculateRegClass(li.reg);
-    vrai.CalculateWeightAndHint(li);
-    DEBUG(dbgs() << "  new interval " << mri_.getRegClass(li.reg)->getName()
-                 << ":" << li << '\n');
+/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
+void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                                    const ConnectedVNInfoEqClasses &ConEq) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    // DBG_VALUE instructions should have been eliminated earlier.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':');
+    const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
+    assert(VNI && "Interval not live at use.");
+    MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
+    DEBUG(dbgs() << VNI->id << '\t' << *MI);
   }
 }
 
+void SplitEditor::finish() {
+  assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
 
-//===----------------------------------------------------------------------===//
-//                               Loop Splitting
-//===----------------------------------------------------------------------===//
+  // At this point, the live intervals in Edit contain VNInfos corresponding to
+  // the inserted copies.
 
-bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
-  SplitAnalysis::LoopBlocks Blocks;
-  sa_.getLoopBlocks(Loop, Blocks);
+  // Add the original defs from the parent interval.
+  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+         E = Edit.getParent().vni_end(); I != E; ++I) {
+    const VNInfo *ParentVNI = *I;
+    if (ParentVNI->isUnused())
+      continue;
+    LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)];
+    VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def);
+    LIM.getLI()->addRange(LiveRange(ParentVNI->def,
+                                    ParentVNI->def.getNextSlot(), VNI));
+    // Mark all values as complex to force liveness computation.
+    // This should really only be necessary for remat victims, but we are lazy.
+    LIM.markComplexMapped(ParentVNI);
+  }
 
-  // Break critical edges as needed.
-  SplitAnalysis::BlockPtrSet CriticalExits;
-  sa_.getCriticalExits(Blocks, CriticalExits);
-  assert(CriticalExits.empty() && "Cannot break critical exits yet");
+#ifndef NDEBUG
+  // Every new interval must have a def by now, otherwise the split is bogus.
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+    assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
+#endif
+
+  // FIXME: Don't recompute the liveness of all values, infer it from the
+  // overlaps between the parent live interval and RegAssign.
+  // The mapValue algorithm is only necessary when:
+  // - The parent value maps to multiple defs, and new phis are needed, or
+  // - The value has been rematerialized before some uses, and we want to
+  //   minimize the live range so it only reaches the remaining uses.
+  // All other values have simple liveness that can be computed from RegAssign
+  // and the parent live interval.
+
+  // Extend live ranges to be live-out for successor PHI values.
+  for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+       E = Edit.getParent().vni_end(); I != E; ++I) {
+    const VNInfo *PHIVNI = *I;
+    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+    LiveIntervalMap &LIM = LIMappers[RegIdx];
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+    DEBUG(dbgs() << "  map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def
+                 << " -> " << RegIdx << '\n');
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+      DEBUG(dbgs() << "    pred BB#" << (*PI)->getNumber() << '@' << End);
+      // The predecessor may not have a live-out value. That is OK, like an
+      // undef PHI operand.
+      if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) {
+        DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n");
+        assert(RegAssign.lookup(End) == RegIdx &&
+               "Different register assignment in phi predecessor");
+        LIM.mapValue(VNI, End);
+      }
+      else
+        DEBUG(dbgs() << " is not live-out\n");
+    }
+    DEBUG(dbgs() << "    " << *LIM.getLI() << '\n');
+  }
 
-  // Create new live interval for the loop.
-  openIntv();
+  // Rewrite instructions.
+  rewriteAssigned();
 
-  // Insert copies in the predecessors.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
-       E = Blocks.Preds.end(); I != E; ++I) {
-    MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
-    enterIntvAtEnd(MBB, *Loop->getHeader());
-  }
+  // FIXME: Delete defs that were rematted everywhere.
 
-  // Switch all loop blocks.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
-       E = Blocks.Loop.end(); I != E; ++I)
-     useIntv(**I);
+  // Get rid of unused values and set phi-kill flags.
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+    (*I)->RenumberValues(LIS);
 
-  // Insert back copies in the exit blocks.
-  for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
-       E = Blocks.Exits.end(); I != E; ++I) {
-    MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
-    leaveIntvAtTop(MBB);
+  // Now check if any registers were separated into multiple components.
+  ConnectedVNInfoEqClasses ConEQ(LIS);
+  for (unsigned i = 0, e = Edit.size(); i != e; ++i) {
+    // Don't use iterators, they are invalidated by create() below.
+    LiveInterval *li = Edit.get(i);
+    unsigned NumComp = ConEQ.Classify(li);
+    if (NumComp <= 1)
+      continue;
+    DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
+    SmallVector<LiveInterval*, 8> dups;
+    dups.push_back(li);
+    for (unsigned i = 1; i != NumComp; ++i)
+      dups.push_back(&Edit.create(MRI, LIS, VRM));
+    rewriteComponents(dups, ConEQ);
+    ConEQ.Distribute(&dups[0]);
   }
 
-  // Done.
-  closeIntv();
-  rewrite();
-  return dupli_;
+  // Calculate spill weight and allocation hints for new intervals.
+  VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
+  for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){
+    LiveInterval &li = **I;
+    vrai.CalculateRegClass(li.reg);
+    vrai.CalculateWeightAndHint(li);
+    DEBUG(dbgs() << "  new interval " << MRI.getRegClass(li.reg)->getName()
+                 << ":" << li << '\n');
+  }
 }
 
 
@@ -979,45 +914,50 @@ bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
 //                            Single Block Splitting
 //===----------------------------------------------------------------------===//
 
-/// splitSingleBlocks - Split curli into a separate live interval inside each
-/// basic block in Blocks. Return true if curli has been completely replaced,
-/// false if curli is still intact, and needs to be spilled or split further.
-bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
-  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
-  // Determine the first and last instruction using curli in each block.
-  typedef std::pair<SlotIndex,SlotIndex> IndexPair;
-  typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
-  IndexPairMap MBBRange;
-  for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
-       E = sa_.usingInstrs_.end(); I != E; ++I) {
-    const MachineBasicBlock *MBB = (*I)->getParent();
-    if (!Blocks.count(MBB))
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+  // If CurLI is local to one block, there is no point to splitting it.
+  if (LiveBlocks.size() <= 1)
+    return false;
+  // Add blocks with multiple uses.
+  for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
+    const BlockInfo &BI = LiveBlocks[i];
+    if (!BI.Uses)
       continue;
-    SlotIndex Idx = lis_.getInstructionIndex(*I);
-    DEBUG(dbgs() << "  BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
-    IndexPair &IP = MBBRange[MBB];
-    if (!IP.first.isValid() || Idx < IP.first)
-      IP.first = Idx;
-    if (!IP.second.isValid() || Idx > IP.second)
-      IP.second = Idx;
+    unsigned Instrs = UsingBlocks.lookup(BI.MBB);
+    if (Instrs <= 1)
+      continue;
+    if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+      continue;
+    Blocks.insert(BI.MBB);
   }
+  return !Blocks.empty();
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
 
-  // Create a new interval for each block.
-  for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
-       E = Blocks.end(); I != E; ++I) {
-    IndexPair &IP = MBBRange[*I];
-    DEBUG(dbgs() << "  splitting for BB#" << (*I)->getNumber() << ": ["
-                 << IP.first << ';' << IP.second << ")\n");
-    assert(IP.first.isValid() && IP.second.isValid());
+  for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
+    const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
+    if (!BI.Uses || !Blocks.count(BI.MBB))
+      continue;
 
     openIntv();
-    enterIntvBefore(IP.first);
-    useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
-    leaveIntvAfter(IP.second);
+    SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
+    if (BI.LastUse < BI.LastSplitPoint) {
+      useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+    } else {
+      // THe last use os after tha last valid split point.
+      SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
+      useIntv(SegStart, SegStop);
+      overlapIntv(SegStop, BI.LastUse);
+    }
     closeIntv();
   }
-  rewrite();
-  return dupli_;
+  finish();
 }
 
 
@@ -1025,31 +965,29 @@ bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
 //                            Sub Block Splitting
 //===----------------------------------------------------------------------===//
 
-/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// getBlockForInsideSplit - If CurLI is contained inside a single basic block,
 /// and it wou pay to subdivide the interval inside that block, return it.
 /// Otherwise return NULL. The returned block can be passed to
 /// SplitEditor::splitInsideBlock.
 const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
   // The interval must be exclusive to one block.
-  if (usingBlocks_.size() != 1)
+  if (UsingBlocks.size() != 1)
     return 0;
   // Don't to this for less than 4 instructions. We want to be sure that
   // splitting actually reduces the instruction count per interval.
-  if (usingInstrs_.size() < 4)
+  if (UsingInstrs.size() < 4)
     return 0;
-  return usingBlocks_.begin()->first;
+  return UsingBlocks.begin()->first;
 }
 
-/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-/// true if curli has been completely replaced, false if curli is still
-/// intact, and needs to be spilled or split further.
-bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+/// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
   SmallVector<SlotIndex, 32> Uses;
-  Uses.reserve(sa_.usingInstrs_.size());
-  for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
-       E = sa_.usingInstrs_.end(); I != E; ++I)
+  Uses.reserve(SA.UsingInstrs.size());
+  for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(),
+       E = SA.UsingInstrs.end(); I != E; ++I)
     if ((*I)->getParent() == MBB)
-      Uses.push_back(lis_.getInstructionIndex(*I));
+      Uses.push_back(LIS.getInstructionIndex(*I));
   DEBUG(dbgs() << "  splitInsideBlock BB#" << MBB->getNumber() << " for "
                << Uses.size() << " instructions.\n");
   assert(Uses.size() >= 3 && "Need at least 3 instructions");
@@ -1077,21 +1015,16 @@ bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
   // First interval before the gap. Don't create single-instr intervals.
   if (bestPos > 1) {
     openIntv();
-    enterIntvBefore(Uses.front());
-    useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
-    leaveIntvAfter(Uses[bestPos-1]);
+    useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1]));
     closeIntv();
   }
 
   // Second interval after the gap.
   if (bestPos < Uses.size()-1) {
     openIntv();
-    enterIntvBefore(Uses[bestPos]);
-    useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
-    leaveIntvAfter(Uses.back());
+    useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back()));
     closeIntv();
   }
 
-  rewrite();
-  return dupli_;
+  finish();
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index ddef7461dc3d..5c34afd1c819 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,125 +12,132 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 
 namespace llvm {
 
+class ConnectedVNInfoEqClasses;
 class LiveInterval;
 class LiveIntervals;
+class LiveRangeEdit;
 class MachineInstr;
-class MachineLoop;
 class MachineLoopInfo;
 class MachineRegisterInfo;
 class TargetInstrInfo;
+class TargetRegisterInfo;
 class VirtRegMap;
 class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
 
 /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
 /// opportunities.
 class SplitAnalysis {
 public:
-  const MachineFunction &mf_;
-  const LiveIntervals &lis_;
-  const MachineLoopInfo &loops_;
-  const TargetInstrInfo &tii_;
+  const MachineFunction &MF;
+  const VirtRegMap &VRM;
+  const LiveIntervals &LIS;
+  const MachineLoopInfo &Loops;
+  const TargetInstrInfo &TII;
 
   // Instructions using the the current register.
   typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
-  InstrPtrSet usingInstrs_;
+  InstrPtrSet UsingInstrs;
+
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
 
-  // The number of instructions using curli in each basic block.
+  // The number of instructions using CurLI in each basic block.
   typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
-  BlockCountMap usingBlocks_;
+  BlockCountMap UsingBlocks;
+
+  /// Additional information about basic blocks where the current variable is
+  /// live. Such a block will look like one of these templates:
+  ///
+  ///  1. |   o---x   | Internal to block. Variable is only live in this block.
+  ///  2. |---x       | Live-in, kill.
+  ///  3. |       o---| Def, live-out.
+  ///  4. |---x   o---| Live-in, kill, def, live-out.
+  ///  5. |---o---o---| Live-through with uses or defs.
+  ///  6. |-----------| Live-through without uses. Transparent.
+  ///
+  struct BlockInfo {
+    MachineBasicBlock *MBB;
+    SlotIndex FirstUse;   ///< First instr using current reg.
+    SlotIndex LastUse;    ///< Last instr using current reg.
+    SlotIndex Kill;       ///< Interval end point inside block.
+    SlotIndex Def;        ///< Interval start point inside block.
+    /// Last possible point for splitting live ranges.
+    SlotIndex LastSplitPoint;
+    bool Uses;            ///< Current reg has uses or defs in block.
+    bool LiveThrough;     ///< Live in whole block (Templ 5. or 6. above).
+    bool LiveIn;          ///< Current reg is live in.
+    bool LiveOut;         ///< Current reg is live out.
+
+    // Per-interference pattern scratch data.
+    bool OverlapEntry;    ///< Interference overlaps entering interval.
+    bool OverlapExit;     ///< Interference overlaps exiting interval.
+  };
 
-  // The number of basic block using curli in each loop.
-  typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
-  LoopCountMap usingLoops_;
+  /// Basic blocks where var is live. This array is parallel to
+  /// SpillConstraints.
+  SmallVector<BlockInfo, 8> LiveBlocks;
 
 private:
   // Current live interval.
-  const LiveInterval *curli_;
+  const LiveInterval *CurLI;
 
-  // Sumarize statistics by counting instructions using curli_.
+  // Sumarize statistics by counting instructions using CurLI.
   void analyzeUses();
 
+  /// calcLiveBlockInfo - Compute per-block information about CurLI.
+  void calcLiveBlockInfo();
+
   /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
   /// analyzed.
   bool canAnalyzeBranch(const MachineBasicBlock *MBB);
 
 public:
-  SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+  SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
                 const MachineLoopInfo &mli);
 
-  /// analyze - set curli to the specified interval, and analyze how it may be
+  /// analyze - set CurLI to the specified interval, and analyze how it may be
   /// split.
   void analyze(const LiveInterval *li);
 
-  /// removeUse - Update statistics by noting that mi no longer uses curli.
-  void removeUse(const MachineInstr *mi);
-
-  const LiveInterval *getCurLI() { return curli_; }
-
   /// clear - clear all data structures so SplitAnalysis is ready to analyze a
   /// new interval.
   void clear();
 
-  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
-  typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
-
-  // Sets of basic blocks surrounding a machine loop.
-  struct LoopBlocks {
-    BlockPtrSet Loop;  // Blocks in the loop.
-    BlockPtrSet Preds; // Loop predecessor blocks.
-    BlockPtrSet Exits; // Loop exit blocks.
-
-    void clear() {
-      Loop.clear();
-      Preds.clear();
-      Exits.clear();
-    }
-  };
-
-  // Calculate the block sets surrounding the loop.
-  void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
-
-  /// LoopPeripheralUse - how is a variable used in and around a loop?
-  /// Peripheral blocks are the loop predecessors and exit blocks.
-  enum LoopPeripheralUse {
-    ContainedInLoop,  // All uses are inside the loop.
-    SinglePeripheral, // At most one instruction per peripheral block.
-    MultiPeripheral,  // Multiple instructions in some peripheral blocks.
-    OutsideLoop       // Uses outside loop periphery.
-  };
-
-  /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-  /// and around the Loop.
-  LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+  /// getParent - Return the last analyzed interval.
+  const LiveInterval &getParent() const { return *CurLI; }
 
-  /// getCriticalExits - It may be necessary to partially break critical edges
-  /// leaving the loop if an exit block has phi uses of curli. Collect the exit
-  /// blocks that need special treatment into CriticalExits.
-  void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+  /// hasUses - Return true if MBB has any uses of CurLI.
+  bool hasUses(const MachineBasicBlock *MBB) const {
+    return UsingBlocks.lookup(MBB);
+  }
 
-  /// canSplitCriticalExits - Return true if it is possible to insert new exit
-  /// blocks before the blocks in CriticalExits.
-  bool canSplitCriticalExits(const LoopBlocks &Blocks,
-                             BlockPtrSet &CriticalExits);
+  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
 
-  /// getBestSplitLoop - Return the loop where curli may best be split to a
-  /// separate register, or NULL.
-  const MachineLoop *getBestSplitLoop();
+  // Print a set of blocks with use counts.
+  void print(const BlockPtrSet&, raw_ostream&) const;
 
   /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
-  /// having curli split to a new live interval. Return true if Blocks can be
+  /// having CurLI split to a new live interval. Return true if Blocks can be
   /// passed to SplitEditor::splitSingleBlocks.
   bool getMultiUseBlocks(BlockPtrSet &Blocks);
 
-  /// getBlockForInsideSplit - If curli is contained inside a single basic block,
-  /// and it wou pay to subdivide the interval inside that block, return it.
-  /// Otherwise return NULL. The returned block can be passed to
+  /// getBlockForInsideSplit - If CurLI is contained inside a single basic
+  /// block, and it would pay to subdivide the interval inside that block,
+  /// return it. Otherwise return NULL. The returned block can be passed to
   /// SplitEditor::splitInsideBlock.
   const MachineBasicBlock *getBlockForInsideSplit();
 };
@@ -140,58 +147,102 @@ public:
 /// interval that is a subset. Insert phi-def values as needed. This class is
 /// used by SplitEditor to create new smaller LiveIntervals.
 ///
-/// parentli_ is the larger interval, li_ is the subset interval. Every value
-/// in li_ corresponds to exactly one value in parentli_, and the live range
-/// of the value is contained within the live range of the parentli_ value.
-/// Values in parentli_ may map to any number of openli_ values, including 0.
+/// ParentLI is the larger interval, LI is the subset interval. Every value
+/// in LI corresponds to exactly one value in ParentLI, and the live range
+/// of the value is contained within the live range of the ParentLI value.
+/// Values in ParentLI may map to any number of OpenLI values, including 0.
 class LiveIntervalMap {
-  LiveIntervals &lis_;
+  LiveIntervals &LIS;
+  MachineDominatorTree &MDT;
 
   // The parent interval is never changed.
-  const LiveInterval &parentli_;
+  const LiveInterval &ParentLI;
 
-  // The child interval's values are fully contained inside parentli_ values.
-  LiveInterval &li_;
+  // The child interval's values are fully contained inside ParentLI values.
+  LiveInterval *LI;
 
   typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
 
-  // Map parentli_ values to simple values in li_ that are defined at the same
-  // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+  // Map ParentLI values to simple values in LI that are defined at the same
+  // SlotIndex, or NULL for ParentLI values that have complex LI defs.
   // Note there is a difference between values mapping to NULL (complex), and
   // values not present (unknown/unmapped).
-  ValueMap valueMap_;
-
-  // extendTo - Find the last li_ value defined in MBB at or before Idx. The
-  // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
-  // Return the found VNInfo, or NULL.
-  VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
-
-  // addSimpleRange - Add a simple range from parentli_ to li_.
-  // ParentVNI must be live in the [Start;End) interval.
-  void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+  ValueMap Values;
+
+  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap;
+
+  // LiveOutCache - Map each basic block where LI is live out to the live-out
+  // value and its defining block. One of these conditions shall be true:
+  //
+  //  1. !LiveOutCache.count(MBB)
+  //  2. LiveOutCache[MBB].second.getNode() == MBB
+  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+  //
+  // This is only a cache, the values can be computed as:
+  //
+  //  VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+  //
+  // The cache is also used as a visiteed set by mapValue().
+  LiveOutMap LiveOutCache;
+
+  // Dump the live-out cache to dbgs().
+  void dumpCache();
 
 public:
   LiveIntervalMap(LiveIntervals &lis,
-                  const LiveInterval &parentli,
-                  LiveInterval &li)
-    : lis_(lis), parentli_(parentli), li_(li) {}
+                  MachineDominatorTree &mdt,
+                  const LiveInterval &parentli)
+    : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {}
+
+  /// reset - clear all data structures and start a new live interval.
+  void reset(LiveInterval *);
+
+  /// getLI - return the current live interval.
+  LiveInterval *getLI() const { return LI; }
 
-  /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+  /// defValue - define a value in LI from the ParentLI value VNI and Idx.
   /// Idx does not have to be ParentVNI->def, but it must be contained within
-  /// ParentVNI's live range in parentli_.
-  /// Return the new li_ value.
+  /// ParentVNI's live range in ParentLI.
+  /// Return the new LI value.
   VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
 
-  /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+  /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is
   /// assumed that ParentVNI is live at Idx.
   /// If ParentVNI has not been defined by defValue, it is assumed that
   /// ParentVNI->def dominates Idx.
   /// If ParentVNI has been defined by defValue one or more times, a value that
   /// dominates Idx will be returned. This may require creating extra phi-def
-  /// values and adding live ranges to li_.
-  VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+  /// values and adding live ranges to LI.
+  /// If simple is not NULL, *simple will indicate if ParentVNI is a simply
+  /// mapped value.
+  VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0);
+
+  // extendTo - Find the last LI value defined in MBB at or before Idx. The
+  // parentli is assumed to be live at Idx. Extend the live range to include
+  // Idx. Return the found VNInfo, or NULL.
+  VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx);
+
+  /// isMapped - Return true is ParentVNI is a known mapped value. It may be a
+  /// simple 1-1 mapping or a complex mapping to later defs.
+  bool isMapped(const VNInfo *ParentVNI) const {
+    return Values.count(ParentVNI);
+  }
+
+  /// isComplexMapped - Return true if ParentVNI has received new definitions
+  /// with defValue.
+  bool isComplexMapped(const VNInfo *ParentVNI) const;
+
+  /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the
+  /// number of definitions.
+  void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; }
+
+  // addSimpleRange - Add a simple range from ParentLI to LI.
+  // ParentVNI must be live in the [Start;End) interval.
+  void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
 
-  /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+  /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
   /// All needed values whose def is not inside [Start;End) must be defined
   /// beforehand so mapValue will work.
   void addRange(SlotIndex Start, SlotIndex End);
@@ -207,115 +258,129 @@ public:
 /// - Mark the ranges where the new interval is used with useIntv* 
 /// - Mark the places where the interval is exited with exitIntv*.
 /// - Finish the current interval with closeIntv and repeat from 2.
-/// - Rewrite instructions with rewrite().
+/// - Rewrite instructions with finish().
 ///
 class SplitEditor {
-  SplitAnalysis &sa_;
-  LiveIntervals &lis_;
-  VirtRegMap &vrm_;
-  MachineRegisterInfo &mri_;
-  const TargetInstrInfo &tii_;
-
-  /// curli_ - The immutable interval we are currently splitting.
-  const LiveInterval *const curli_;
-
-  /// dupli_ - Created as a copy of curli_, ranges are carved out as new
-  /// intervals get added through openIntv / closeIntv. This is used to avoid
-  /// editing curli_.
-  LiveInterval *dupli_;
-
-  /// Currently open LiveInterval.
-  LiveInterval *openli_;
-
-  /// createInterval - Create a new virtual register and LiveInterval with same
-  /// register class and spill slot as curli.
-  LiveInterval *createInterval();
-
-  /// getDupLI - Ensure dupli is created and return it.
-  LiveInterval *getDupLI();
-
-  /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
-  /// mappings, and do not include values created by inserted copies.
-  DenseMap<const VNInfo*, VNInfo*> valueMap_;
-
-  /// mapValue - Return the openIntv value that corresponds to the given curli
-  /// value.
-  VNInfo *mapValue(const VNInfo *curliVNI);
-
-  /// A dupli value is live through openIntv.
-  bool liveThrough_;
-
-  /// All the new intervals created for this split are added to intervals_.
-  SmallVectorImpl<LiveInterval*> &intervals_;
-
-  /// The index into intervals_ of the first interval we added. There may be
-  /// others from before we got it.
-  unsigned firstInterval;
-
-  /// Insert a COPY instruction curli -> li. Allocate a new value from li
-  /// defined by the COPY
-  VNInfo *insertCopy(LiveInterval &LI,
-                     MachineBasicBlock &MBB,
-                     MachineBasicBlock::iterator I);
+  SplitAnalysis &SA;
+  LiveIntervals &LIS;
+  VirtRegMap &VRM;
+  MachineRegisterInfo &MRI;
+  MachineDominatorTree &MDT;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+
+  /// Edit - The current parent register and new intervals created.
+  LiveRangeEdit &Edit;
+
+  /// Index into Edit of the currently open interval.
+  /// The index 0 is used for the complement, so the first interval started by
+  /// openIntv will be 1.
+  unsigned OpenIdx;
+
+  typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+  /// Allocator for the interval map. This will eventually be shared with
+  /// SlotIndexes and LiveIntervals.
+  RegAssignMap::Allocator Allocator;
+
+  /// RegAssign - Map of the assigned register indexes.
+  /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+  /// Idx.
+  RegAssignMap RegAssign;
+
+  /// LIMappers - One LiveIntervalMap or each interval in Edit.
+  SmallVector<LiveIntervalMap, 4> LIMappers;
+
+  /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+  /// rematerialization or a COPY from parent. Return the new value.
+  VNInfo *defFromParent(unsigned RegIdx,
+                        VNInfo *ParentVNI,
+                        SlotIndex UseIdx,
+                        MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I);
+
+  /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+  void rewriteAssigned();
+
+  /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
+  /// classes in ConEQ.
+  /// This must be done when Intvs[0] is styill live at all uses, before calling
+  /// ConEq.Distribute().
+  void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                         const ConnectedVNInfoEqClasses &ConEq);
 
 public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.
   SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
-              SmallVectorImpl<LiveInterval*> &newIntervals);
+              MachineDominatorTree&, LiveRangeEdit&);
 
   /// getAnalysis - Get the corresponding analysis.
-  SplitAnalysis &getAnalysis() { return sa_; }
+  SplitAnalysis &getAnalysis() { return SA; }
 
   /// Create a new virtual register and live interval.
   void openIntv();
 
-  /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-  /// not live before Idx, a COPY is not inserted.
-  void enterIntvBefore(SlotIndex Idx);
+  /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+  /// If the parent interval is not live before Idx, a COPY is not inserted.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvBefore(SlotIndex Idx);
 
-  /// enterIntvAtEnd - Enter openli at the end of MBB.
-  /// PhiMBB is a successor inside openli where a PHI value is created.
-  /// Currently, all entries must share the same PhiMBB.
-  void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+  /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+  /// Use the open interval from he inserted copy to the MBB end.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
 
-  /// useIntv - indicate that all instructions in MBB should use openli.
+  /// useIntv - indicate that all instructions in MBB should use OpenLI.
   void useIntv(const MachineBasicBlock &MBB);
 
-  /// useIntv - indicate that all instructions in range should use openli.
+  /// useIntv - indicate that all instructions in range should use OpenLI.
   void useIntv(SlotIndex Start, SlotIndex End);
 
-  /// leaveIntvAfter - Leave openli after the instruction at Idx.
-  void leaveIntvAfter(SlotIndex Idx);
+  /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+  /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvBefore(SlotIndex Idx);
 
   /// leaveIntvAtTop - Leave the interval at the top of MBB.
-  /// Currently, only one value can leave the interval.
-  void leaveIntvAtTop(MachineBasicBlock &MBB);
+  /// Add liveness from the MBB top to the copy.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+  /// overlapIntv - Indicate that all instructions in range should use the open
+  /// interval, but also let the complement interval be live.
+  ///
+  /// This doubles the register pressure, but is sometimes required to deal with
+  /// register uses after the last valid split point.
+  ///
+  /// The Start index should be a return value from a leaveIntv* call, and End
+  /// should be in the same basic block. The parent interval must have the same
+  /// value across the range.
+  ///
+  void overlapIntv(SlotIndex Start, SlotIndex End);
 
   /// closeIntv - Indicate that we are done editing the currently open
   /// LiveInterval, and ranges can be trimmed.
   void closeIntv();
 
-  /// rewrite - after all the new live ranges have been created, rewrite
-  /// instructions using curli to use the new intervals.
-  void rewrite();
+  /// finish - after all the new live ranges have been created, compute the
+  /// remaining live range, and rewrite instructions to use the new registers.
+  void finish();
 
-  // ===--- High level methods ---===
+  /// dump - print the current interval maping to dbgs().
+  void dump() const;
 
-  /// splitAroundLoop - Split curli into a separate live interval inside
-  /// the loop. Return true if curli has been completely replaced, false if
-  /// curli is still intact, and needs to be spilled or split further.
-  bool splitAroundLoop(const MachineLoop*);
+  // ===--- High level methods ---===
 
-  /// splitSingleBlocks - Split curli into a separate live interval inside each
-  /// basic block in Blocks. Return true if curli has been completely replaced,
-  /// false if curli is still intact, and needs to be spilled or split further.
-  bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+  /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+  /// basic block in Blocks.
+  void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
 
-  /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-  /// true if curli has been completely replaced, false if curli is still
-  /// intact, and needs to be spilled or split further.
-  bool splitInsideBlock(const MachineBasicBlock *);
+  /// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+  void splitInsideBlock(const MachineBasicBlock *);
 };
 
 }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index 38f3b1f4d35e..08aee82b8c5c 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -29,8 +29,14 @@
 using namespace llvm;
 
 char LoopSplitter::ID = 0;
-INITIALIZE_PASS(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false);
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
 
 namespace llvm {
 
@@ -140,7 +146,6 @@ namespace llvm {
       VNInfo *newVal = getNewVNI(preHeaderRange->valno);
       newVal->def = copyDefIdx;
       newVal->setCopy(copy);
-      newVal->setIsDefAccurate(true);
       li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
 
       getNewLI()->addRange(LiveRange(copyDefIdx,
@@ -174,13 +179,13 @@ namespace llvm {
         
         // Blow away output range definition.
         outRange->valno->def = ls.lis->getInvalidIndex();
-        outRange->valno->setIsDefAccurate(false);
         li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
 
+        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
         VNInfo *newVal =
-          getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
-                                             true),
-                                   0, false, ls.lis->getVNInfoAllocator());
+          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
 
         getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
                                        copyDefIdx, newVal));
@@ -514,8 +519,10 @@ namespace llvm {
       if (!insertRange)
         continue;
 
-      VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
-                                       0, false, lis->getVNInfoAllocator());
+      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+             "PHI def index points at actual instruction.");
+      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
       li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
                             lis->getMBBEndIdx(preHeader),
                             newVal));
@@ -612,8 +619,11 @@ namespace llvm {
                          lis->getMBBEndIdx(splitBlock), true);
         }
       } else if (intersects) {
-        VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
-                                         0, false, lis->getVNInfoAllocator());
+        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+                                         lis->getVNInfoAllocator());
         li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
                               lis->getMBBEndIdx(splitBlock),
                               newVal));
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
index a726a7b834fb..9fb1b8b30139 100644
--- a/lib/CodeGen/Splitter.h
+++ b/lib/CodeGen/Splitter.h
@@ -36,7 +36,9 @@ namespace llvm {
   public:
     static char ID;
 
-    LoopSplitter() : MachineFunctionPass(ID) {}
+    LoopSplitter() : MachineFunctionPass(ID) {
+      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &au) const;
 
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 9f51778da756..fcaee4208ba3 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Attributes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -45,6 +46,8 @@ namespace {
     Function *F;
     Module *M;
 
+    DominatorTree* DT;
+
     /// InsertStackProtectors - Insert code into the prologue and epilogue of
     /// the function.
     ///
@@ -62,9 +65,17 @@ namespace {
     bool RequiresStackProtector() const;
   public:
     static char ID;             // Pass identification, replacement for typeid.
-    StackProtector() : FunctionPass(ID), TLI(0) {}
+    StackProtector() : FunctionPass(ID), TLI(0) {
+      initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+    }
     StackProtector(const TargetLowering *tli)
-      : FunctionPass(ID), TLI(tli) {}
+      : FunctionPass(ID), TLI(tli) {
+        initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+    }
 
     virtual bool runOnFunction(Function &Fn);
   };
@@ -72,7 +83,7 @@ namespace {
 
 char StackProtector::ID = 0;
 INITIALIZE_PASS(StackProtector, "stack-protector",
-                "Insert stack protectors", false, false);
+                "Insert stack protectors", false, false)
 
 FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
   return new StackProtector(tli);
@@ -81,6 +92,7 @@ FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
 bool StackProtector::runOnFunction(Function &Fn) {
   F = &Fn;
   M = F->getParent();
+  DT = getAnalysisIfAvailable<DominatorTree>();
 
   if (!RequiresStackProtector()) return false;
   
@@ -135,6 +147,7 @@ bool StackProtector::RequiresStackProtector() const {
 ///    value. It calls __stack_chk_fail if they differ.
 bool StackProtector::InsertStackProtectors() {
   BasicBlock *FailBB = 0;       // The basic block to jump to if check fails.
+  BasicBlock *FailBBDom = 0;    // FailBB's dominator.
   AllocaInst *AI = 0;           // Place on stack that stores the stack guard.
   Value *StackGuardVar = 0;  // The stack guard variable.
 
@@ -178,6 +191,8 @@ bool StackProtector::InsertStackProtectors() {
 
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
+      if (DT)
+        FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
     }
 
     // For each block with a return instruction, convert this:
@@ -204,6 +219,10 @@ bool StackProtector::InsertStackProtectors() {
 
     // Split the basic block before the return instruction.
     BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+    if (DT) {
+      DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
+      FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+    }
 
     // Remove default branch instruction to the new BB.
     BB->getTerminator()->eraseFromParent();
@@ -223,6 +242,9 @@ bool StackProtector::InsertStackProtectors() {
   // statements in the function.
   if (!FailBB) return false;
 
+  if (DT)
+    DT->addNewBlock(FailBB, FailBBDom);
+
   return true;
 }
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 8d57ae95dde2..01f5b5627f4f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,13 @@ namespace {
   public:
     static char ID; // Pass identification
     StackSlotColoring() :
-      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
+      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
     StackSlotColoring(bool RegColor) :
-      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
+      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -145,8 +149,14 @@ namespace {
 
 char StackSlotColoring::ID = 0;
 
-INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
-                "Stack Slot Coloring", false, false);
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
 
 FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
   return new StackSlotColoring(RegColor);
@@ -208,7 +218,7 @@ void StackSlotColoring::InitializeSlots() {
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
-    int FI = li.getStackSlotIndex();
+    int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
     if (MFI->isDeadObjectIndex(FI))
       continue;
     SSIntervals.push_back(&li);
@@ -251,7 +261,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
   DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     if (!UsedColors[SS] || li->weight < 20)
       // If the weight is < 20, i.e. two references in a loop with depth 1,
       // don't bother with it.
@@ -340,7 +350,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
 
   // Record the assignment.
   Assignments[Color].push_back(li);
-  int FI = li->getStackSlotIndex();
+  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
   DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
@@ -369,7 +379,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     int NewSS = ColorSlot(li);
     assert(NewSS >= 0 && "Stack coloring failed?");
     SlotMapping[SS] = NewSS;
@@ -382,7 +392,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   DEBUG(dbgs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
-    int SS = li->getStackSlotIndex();
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
     li->weight = SlotWeights[SS];
   }
   // Sort them by new weight.
@@ -636,7 +646,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   } else {
     SmallVector<MachineInstr*, 4> NewMIs;
     bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
-    Success = Success; // Silence compiler warning.
+    (void)Success; // Silence compiler warning.
     assert(Success && "Failed to unfold!");
     MachineInstr *NewMI = NewMIs[0];
     MBB->insert(MI, NewMI);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 894dbfa28bac..ec7829ec39fe 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -1,4 +1,4 @@
-//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,1039 +7,823 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass eliminates machine instruction PHI nodes by inserting copy
-// instructions, using an intelligent copy-folding technique based on
-// dominator information.  This is technique is derived from:
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
 // 
 //    Budimlic, et al. Fast copy coalescing and live-range identification.
 //    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
 //    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
 //    PLDI '02. ACM, New York, NY, 25-32.
-//    DOI= http://doi.acm.org/10.1145/512529.512534
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest.  This technique is explained in:
+//
+//   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+//     Quality and Efficiency,
+//   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+//   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+//   CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
-  struct StrongPHIElimination : public MachineFunctionPass {
+  class StrongPHIElimination : public MachineFunctionPass {
+  public:
     static char ID; // Pass identification, replacement for typeid
-    StrongPHIElimination() : MachineFunctionPass(ID) {}
-
-    // Waiting stores, for each MBB, the set of copies that need to
-    // be inserted into that MBB
-    DenseMap<MachineBasicBlock*,
-             std::multimap<unsigned, unsigned> > Waiting;
-    
-    // Stacks holds the renaming stack for each register
-    std::map<unsigned, std::vector<unsigned> > Stacks;
-    
-    // Registers in UsedByAnother are PHI nodes that are themselves
-    // used as operands to another PHI node
-    std::set<unsigned> UsedByAnother;
-    
-    // RenameSets are the is a map from a PHI-defined register
-    // to the input registers to be coalesced along with the 
-    // predecessor block for those input registers.
-    std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
-    
-    // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
-    // eliminating, indexed by the register defined by that phi.
-    std::map<unsigned, unsigned> PhiValueNumber;
-
-    // Store the DFS-in number of each block
-    DenseMap<MachineBasicBlock*, unsigned> preorder;
-    
-    // Store the DFS-out number of each block
-    DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
-
-    bool runOnMachineFunction(MachineFunction &Fn);
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<MachineDominatorTree>();
-      AU.addRequired<SlotIndexes>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addRequired<LiveIntervals>();
-      
-      // TODO: Actually make this true.
-      AU.addPreserved<LiveIntervals>();
-      AU.addPreserved<RegisterCoalescer>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-    
-    virtual void releaseMemory() {
-      preorder.clear();
-      maxpreorder.clear();
-      
-      Waiting.clear();
-      Stacks.clear();
-      UsedByAnother.clear();
-      RenameSets.clear();
+    StrongPHIElimination() : MachineFunctionPass(ID) {
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
     }
 
+    virtual void getAnalysisUsage(AnalysisUsage&) const;
+    bool runOnMachineFunction(MachineFunction&);
+
   private:
-    
-    /// DomForestNode - Represents a node in the "dominator forest".  This is
-    /// a forest in which the nodes represent registers and the edges
-    /// represent a dominance relation in the block defining those registers.
-    struct DomForestNode {
-    private:
-      // Store references to our children
-      std::vector<DomForestNode*> children;
-      // The register we represent
-      unsigned reg;
-      
-      // Add another node as our child
-      void addChild(DomForestNode* DFN) { children.push_back(DFN); }
-      
-    public:
-      typedef std::vector<DomForestNode*>::iterator iterator;
-      
-      // Create a DomForestNode by providing the register it represents, and
-      // the node to be its parent.  The virtual root node has register 0
-      // and a null parent.
-      DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
-        if (parent)
-          parent->addChild(this);
-      }
-      
-      ~DomForestNode() {
-        for (iterator I = begin(), E = end(); I != E; ++I)
-          delete *I;
-      }
-      
-      /// getReg - Return the regiser that this node represents
-      inline unsigned getReg() { return reg; }
-      
-      // Provide iterator access to our children
-      inline DomForestNode::iterator begin() { return children.begin(); }
-      inline DomForestNode::iterator end() { return children.end(); }
+    /// This struct represents a single node in the union-find data structure
+    /// representing the variable congruence classes. There is one difference
+    /// from a normal union-find data structure. We steal two bits from the parent
+    /// pointer . One of these bits is used to represent whether the register
+    /// itself has been isolated, and the other is used to represent whether the
+    /// PHI with that register as its destination has been isolated.
+    ///
+    /// Note that this leads to the strange situation where the leader of a
+    /// congruence class may no longer logically be a member, due to being
+    /// isolated.
+    struct Node {
+      enum Flags {
+        kRegisterIsolatedFlag = 1,
+        kPHIIsolatedFlag = 2
+      };
+      Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+      Node *getLeader();
+
+      PointerIntPair<Node*, 2> parent;
+      unsigned value;
+      unsigned rank;
     };
-    
-    void computeDFS(MachineFunction& MF);
-    void processBlock(MachineBasicBlock* MBB);
-    
-    std::vector<DomForestNode*> computeDomForest(
-                           std::map<unsigned, MachineBasicBlock*>& instrs,
-                                                 MachineRegisterInfo& MRI);
-    void processPHIUnion(MachineInstr* Inst,
-                         std::map<unsigned, MachineBasicBlock*>& PHIUnion,
-                         std::vector<StrongPHIElimination::DomForestNode*>& DF,
-                         std::vector<std::pair<unsigned, unsigned> >& locals);
-    void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
-    void InsertCopies(MachineDomTreeNode* MBB,
-                      SmallPtrSet<MachineBasicBlock*, 16>& v);
-    bool mergeLiveIntervals(unsigned primary, unsigned secondary);
-  };
-}
 
-char StrongPHIElimination::ID = 0;
-INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
-  "Eliminate PHI nodes for register allocation, intelligently", false, false);
+    /// Add a register in a new congruence class containing only itself.
+    void addReg(unsigned);
 
-char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+    /// Join the congruence classes of two registers. This function is biased
+    /// towards the left argument, i.e. after
+    ///
+    /// addReg(r2);
+    /// unionRegs(r1, r2);
+    ///
+    /// the leader of the unioned congruence class is the same as the leader of
+    /// r1's congruence class prior to the union. This is actually relied upon
+    /// in the copy insertion code.
+    void unionRegs(unsigned, unsigned);
 
-/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
-/// of the given MachineFunction.  These numbers are then used in other parts
-/// of the PHI elimination process.
-void StrongPHIElimination::computeDFS(MachineFunction& MF) {
-  SmallPtrSet<MachineDomTreeNode*, 8> frontier;
-  SmallPtrSet<MachineDomTreeNode*, 8> visited;
-  
-  unsigned time = 0;
-  
-  MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
-  
-  MachineDomTreeNode* node = DT.getRootNode();
-  
-  std::vector<MachineDomTreeNode*> worklist;
-  worklist.push_back(node);
-  
-  while (!worklist.empty()) {
-    MachineDomTreeNode* currNode = worklist.back();
-    
-    if (!frontier.count(currNode)) {
-      frontier.insert(currNode);
-      ++time;
-      preorder.insert(std::make_pair(currNode->getBlock(), time));
-    }
-    
-    bool inserted = false;
-    for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
-         I != E; ++I)
-      if (!frontier.count(*I) && !visited.count(*I)) {
-        worklist.push_back(*I);
-        inserted = true;
-        break;
-      }
-    
-    if (!inserted) {
-      frontier.erase(currNode);
-      visited.insert(currNode);
-      maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
-      
-      worklist.pop_back();
+    /// Get the color of a register. The color is 0 if the register has been
+    /// isolated.
+    unsigned getRegColor(unsigned);
+
+    // Isolate a register.
+    void isolateReg(unsigned);
+
+    /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+    /// isolated. Otherwise, it is the original color of its destination and
+    /// all of its operands (before they were isolated, if they were).
+    unsigned getPHIColor(MachineInstr*);
+
+    /// Isolate a PHI.
+    void isolatePHI(MachineInstr*);
+
+    /// Traverses a basic block, splitting any interferences found between
+    /// registers in the same congruence class. It takes two DenseMaps as
+    /// arguments that it also updates: CurrentDominatingParent, which maps
+    /// a color to the register in that congruence class whose definition was
+    /// most recently seen, and ImmediateDominatingParent, which maps a register
+    /// to the register in the same congruence class that most immediately
+    /// dominates it.
+    ///
+    /// This function assumes that it is being called in a depth-first traversal
+    /// of the dominator tree.
+    void SplitInterferencesForBasicBlock(
+      MachineBasicBlock&,
+      DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+      DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+    // Lowers a PHI instruction, inserting copies of the source and destination
+    // registers as necessary.
+    void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+    // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+    // everywhere that Reg appears. Requires Reg and NewReg to have non-
+    // overlapping lifetimes.
+    void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    MachineDominatorTree *DT;
+    LiveIntervals *LI;
+
+    BumpPtrAllocator Allocator;
+
+    DenseMap<unsigned, Node*> RegNodeMap;
+
+    // Maps a basic block to a list of its defs of registers that appear as PHI
+    // sources.
+    DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+    // Maps a color to a pair of a MachineInstr* and a virtual register, which
+    // is the operand of that PHI corresponding to the current basic block.
+    DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+    // FIXME: Can these two data structures be combined? Would a std::multimap
+    // be any better?
+
+    // Stores pairs of predecessor basic blocks and the source registers of
+    // inserted copy instructions.
+    typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+    SrcCopySet InsertedSrcCopySet;
+
+    // Maps pairs of predecessor basic blocks and colors to their defining copy
+    // instructions.
+    typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+      SrcCopyMap;
+    SrcCopyMap InsertedSrcCopyMap;
+
+    // Maps inserted destination copy registers to their defining copy
+    // instructions.
+    typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+    DestCopyMap InsertedDestCopies;
+  };
+
+  struct MIIndexCompare {
+    MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
     }
-  }
-}
 
-namespace {
+    LiveIntervals *LI;
+  };
+} // namespace
 
-/// PreorderSorter - a helper class that is used to sort registers
-/// according to the preorder number of their defining blocks
-class PreorderSorter {
-private:
-  DenseMap<MachineBasicBlock*, unsigned>& preorder;
-  MachineRegisterInfo& MRI;
-  
-public:
-  PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
-                MachineRegisterInfo& M) : preorder(p), MRI(M) { }
-  
-  bool operator()(unsigned A, unsigned B) {
-    if (A == B)
-      return false;
-    
-    MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
-    MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
-    
-    if (preorder[ABlock] < preorder[BBlock])
-      return true;
-    else if (preorder[ABlock] > preorder[BBlock])
-      return false;
-    
-    return false;
-  }
-};
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
 
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-/// computeDomForest - compute the subforest of the DomTree corresponding
-/// to the defining blocks of the registers in question
-std::vector<StrongPHIElimination::DomForestNode*>
-StrongPHIElimination::computeDomForest(
-                  std::map<unsigned, MachineBasicBlock*>& regs, 
-                                       MachineRegisterInfo& MRI) {
-  // Begin by creating a virtual root node, since the actual results
-  // may well be a forest.  Assume this node has maximum DFS-out number.
-  DomForestNode* VirtualRoot = new DomForestNode(0, 0);
-  maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
-  
-  // Populate a worklist with the registers
-  std::vector<unsigned> worklist;
-  worklist.reserve(regs.size());
-  for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
-       E = regs.end(); I != E; ++I)
-    worklist.push_back(I->first);
-  
-  // Sort the registers by the DFS-in number of their defining block
-  PreorderSorter PS(preorder, MRI);
-  std::sort(worklist.begin(), worklist.end(), PS);
-  
-  // Create a "current parent" stack, and put the virtual root on top of it
-  DomForestNode* CurrentParent = VirtualRoot;
-  std::vector<DomForestNode*> stack;
-  stack.push_back(VirtualRoot);
-  
-  // Iterate over all the registers in the previously computed order
-  for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
-       I != E; ++I) {
-    unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
-    MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
-                 MRI.getVRegDef(CurrentParent->getReg())->getParent() :
-                 0;
-    
-    // If the DFS-in number of the register is greater than the DFS-out number
-    // of the current parent, repeatedly pop the parent stack until it isn't.
-    while (pre > maxpreorder[parentBlock]) {
-      stack.pop_back();
-      CurrentParent = stack.back();
-      
-      parentBlock = CurrentParent->getReg() ?
-                   MRI.getVRegDef(CurrentParent->getReg())->getParent() :
-                   0;
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+  // FIXME: This only needs to check from the first terminator, as only the
+  // first terminator can use a virtual register.
+  for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+    assert (RI != MBB->rend());
+    MachineInstr *MI = &*RI;
+
+    for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+         OE = MI->operands_end(); OI != OE; ++OI) {
+      MachineOperand &MO = *OI;
+      if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+        return &MO;
     }
-    
-    // Now that we've found the appropriate parent, create a DomForestNode for
-    // this register and attach it to the forest
-    DomForestNode* child = new DomForestNode(*I, CurrentParent);
-    
-    // Push this new node on the "current parent" stack
-    stack.push_back(child);
-    CurrentParent = child;
   }
-  
-  // Return a vector containing the children of the virtual root node
-  std::vector<DomForestNode*> ret;
-  ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
-  return ret;
+  return NULL;
 }
 
-/// isLiveIn - helper method that determines, from a regno, if a register
-/// is live into a block
-static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
-                     LiveIntervals& LI) {
-  LiveInterval& I = LI.getOrCreateInterval(r);
-  SlotIndex idx = LI.getMBBStartIdx(MBB);
-  return I.liveAt(idx);
-}
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TII = MF.getTarget().getInstrInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<LiveIntervals>();
 
-/// isLiveOut - help method that determines, from a regno, if a register is
-/// live out of a block.
-static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
-                      LiveIntervals& LI) {
-  for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
-       E = MBB->succ_end(); PI != E; ++PI)
-    if (isLiveIn(r, *PI, LI))
-      return true;
-  
-  return false;
-}
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+      PHISrcDefs[I].push_back(BBI);
 
-/// interferes - checks for local interferences by scanning a block.  The only
-/// trick parameter is 'mode' which tells it the relationship of the two
-/// registers. 0 - defined in the same block, 1 - first properly dominates
-/// second, 2 - second properly dominates first 
-static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
-                       LiveIntervals& LV, unsigned mode) {
-  MachineInstr* def = 0;
-  MachineInstr* kill = 0;
-  
-  // The code is still in SSA form at this point, so there is only one
-  // definition per VReg.  Thus we can safely use MRI->getVRegDef().
-  const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
-  
-  bool interference = false;
-  
-  // Wallk the block, checking for interferences
-  for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
-       MBI != MBE; ++MBI) {
-    MachineInstr* curr = MBI;
-    
-    // Same defining block...
-    if (mode == 0) {
-      if (curr == MRI->getVRegDef(a)) {
-        // If we find our first definition, save it
-        if (!def) {
-          def = curr;
-        // If there's already an unkilled definition, then 
-        // this is an interference
-        } else if (!kill) {
-          interference = true;
-          break;
-        // If there's a definition followed by a KillInst, then
-        // they can't interfere
-        } else {
-          interference = false;
-          break;
-        }
-      // Symmetric with the above
-      } else if (curr == MRI->getVRegDef(b)) {
-        if (!def) {
-          def = curr;
-        } else if (!kill) {
-          interference = true;
-          break;
-        } else {
-          interference = false;
-          break;
-        }
-      // Store KillInsts if they match up with the definition
-      } else if (curr->killsRegister(a)) {
-        if (def == MRI->getVRegDef(a)) {
-          kill = curr;
-        } else if (curr->killsRegister(b)) {
-          if (def == MRI->getVRegDef(b)) {
-            kill = curr;
-          }
-        }
-      }
-    // First properly dominates second...
-    } else if (mode == 1) {
-      if (curr == MRI->getVRegDef(b)) {
-        // Definition of second without kill of first is an interference
-        if (!kill) {
-          interference = true;
-          break;
-        // Definition after a kill is a non-interference
-        } else {
-          interference = false;
-          break;
-        }
-      // Save KillInsts of First
-      } else if (curr->killsRegister(a)) {
-        kill = curr;
-      }
-    // Symmetric with the above
-    } else if (mode == 2) {
-      if (curr == MRI->getVRegDef(a)) {
-        if (!kill) {
-          interference = true;
-          break;
-        } else {
-          interference = false;
-          break;
-        }
-      } else if (curr->killsRegister(b)) {
-        kill = curr;
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        MachineOperand &SrcMO = BBI->getOperand(i);
+        unsigned SrcReg = SrcMO.getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+
+        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+        if (DefMI)
+          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
       }
     }
   }
-  
-  return interference;
-}
 
-/// processBlock - Determine how to break up PHIs in the current block.  Each
-/// PHI is broken up by some combination of renaming its operands and inserting
-/// copies.  This method is responsible for determining which operands receive
-/// which treatment.
-void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-  
-  // Holds names that have been added to a set in any PHI within this block
-  // before the current one.
-  std::set<unsigned> ProcessedNames;
-  
-  // Iterate over all the PHI nodes in this block
-  MachineBasicBlock::iterator P = MBB->begin();
-  while (P != MBB->end() && P->isPHI()) {
-    unsigned DestReg = P->getOperand(0).getReg();
-    
-    // Don't both doing PHI elimination for dead PHI's.
-    if (P->registerDefIsDead(DestReg)) {
-      ++P;
-      continue;
-    }
+  // Perform a depth-first traversal of the dominator tree, splitting
+  // interferences amongst PHI-congruence classes.
+  DenseMap<unsigned, unsigned> CurrentDominatingParent;
+  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+    SplitInterferencesForBasicBlock(*DI->getBlock(),
+                                    CurrentDominatingParent,
+                                    ImmediateDominatingParent);
+  }
 
-    LiveInterval& PI = LI.getOrCreateInterval(DestReg);
-    SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
-    VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
-    PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
-
-    // PHIUnion is the set of incoming registers to the PHI node that
-    // are going to be renames rather than having copies inserted.  This set
-    // is refinded over the course of this function.  UnionedBlocks is the set
-    // of corresponding MBBs.
-    std::map<unsigned, MachineBasicBlock*> PHIUnion;
-    SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
-  
-    // Iterate over the operands of the PHI node
-    for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
-      unsigned SrcReg = P->getOperand(i-1).getReg();
-      
-      // Don't need to try to coalesce a register with itself.
-      if (SrcReg == DestReg) {
-        ProcessedNames.insert(SrcReg);
-        continue;
-      }
-      
-      // We don't need to insert copies for implicit_defs.
-      MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
-      if (DefMI->isImplicitDef())
-        ProcessedNames.insert(SrcReg);
-    
-      // Check for trivial interferences via liveness information, allowing us
-      // to avoid extra work later.  Any registers that interfere cannot both
-      // be in the renaming set, so choose one and add copies for it instead.
-      // The conditions are:
-      //   1) if the operand is live into the PHI node's block OR
-      //   2) if the PHI node is live out of the operand's defining block OR
-      //   3) if the operand is itself a PHI node and the original PHI is
-      //      live into the operand's defining block OR
-      //   4) if the operand is already being renamed for another PHI node
-      //      in this block OR
-      //   5) if any two operands are defined in the same block, insert copies
-      //      for one of them
-      if (isLiveIn(SrcReg, P->getParent(), LI) ||
-          isLiveOut(P->getOperand(0).getReg(),
-                    MRI.getVRegDef(SrcReg)->getParent(), LI) ||
-          ( MRI.getVRegDef(SrcReg)->isPHI() &&
-            isLiveIn(P->getOperand(0).getReg(),
-                     MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
-          ProcessedNames.count(SrcReg) ||
-          UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
-        
-        // Add a copy for the selected register
-        MachineBasicBlock* From = P->getOperand(i).getMBB();
-        Waiting[From].insert(std::make_pair(SrcReg, DestReg));
-        UsedByAnother.insert(SrcReg);
-      } else {
-        // Otherwise, add it to the renaming set
-        PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
-        UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
-      }
+  // Insert copies for all PHI source and destination registers.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      InsertCopiesForPHI(BBI, I);
     }
-    
-    // Compute the dominator forest for the renaming set.  This is a forest
-    // where the nodes are the registers and the edges represent dominance 
-    // relations between the defining blocks of the registers
-    std::vector<StrongPHIElimination::DomForestNode*> DF = 
-                                                computeDomForest(PHIUnion, MRI);
-    
-    // Walk DomForest to resolve interferences at an inter-block level.  This
-    // will remove registers from the renaming set (and insert copies for them)
-    // if interferences are found.
-    std::vector<std::pair<unsigned, unsigned> > localInterferences;
-    processPHIUnion(P, PHIUnion, DF, localInterferences);
-    
-    // If one of the inputs is defined in the same block as the current PHI
-    // then we need to check for a local interference between that input and
-    // the PHI.
-    for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
-         E = PHIUnion.end(); I != E; ++I)
-      if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
-        localInterferences.push_back(std::make_pair(I->first,
-                                                    P->getOperand(0).getReg()));
-    
-    // The dominator forest walk may have returned some register pairs whose
-    // interference cannot be determined from dominator analysis.  We now 
-    // examine these pairs for local interferences.
-    for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
-        localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
-      std::pair<unsigned, unsigned> p = *I;
-      
-      MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-      
-      // Determine the block we need to scan and the relationship between
-      // the two registers
-      MachineBasicBlock* scan = 0;
-      unsigned mode = 0;
-      if (MRI.getVRegDef(p.first)->getParent() ==
-          MRI.getVRegDef(p.second)->getParent()) {
-        scan = MRI.getVRegDef(p.first)->getParent();
-        mode = 0; // Same block
-      } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
-                               MRI.getVRegDef(p.second)->getParent())) {
-        scan = MRI.getVRegDef(p.second)->getParent();
-        mode = 1; // First dominates second
-      } else {
-        scan = MRI.getVRegDef(p.first)->getParent();
-        mode = 2; // Second dominates first
-      }
-      
-      // If there's an interference, we need to insert  copies
-      if (interferes(p.first, p.second, scan, LI, mode)) {
-        // Insert copies for First
-        for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
-          if (P->getOperand(i-1).getReg() == p.first) {
-            unsigned SrcReg = p.first;
-            MachineBasicBlock* From = P->getOperand(i).getMBB();
-            
-            Waiting[From].insert(std::make_pair(SrcReg,
-                                                P->getOperand(0).getReg()));
-            UsedByAnother.insert(SrcReg);
-            
-            PHIUnion.erase(SrcReg);
-          }
-        }
+  }
+
+  // FIXME: Preserve the equivalence classes during copy insertion and use
+  // the preversed equivalence classes instead of recomputing them.
+  RegNodeMap.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        unsigned SrcReg = BBI->getOperand(i).getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
       }
     }
-    
-    // Add the renaming set for this PHI node to our overall renaming information
-    for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
-         QE = PHIUnion.end(); QI != QE; ++QI) {
-      DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
-                   << P->getOperand(0).getReg() << "\n");
-    }
-    
-    RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
-    
-    // Remember which registers are already renamed, so that we don't try to 
-    // rename them for another PHI node in this block
-    for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
-         E = PHIUnion.end(); I != E; ++I)
-      ProcessedNames.insert(I->first);
-    
-    ++P;
   }
-}
 
-/// processPHIUnion - Take a set of candidate registers to be coalesced when
-/// decomposing the PHI instruction.  Use the DominanceForest to remove the ones
-/// that are known to interfere, and flag others that need to be checked for
-/// local interferences.
-void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
-                        std::map<unsigned, MachineBasicBlock*>& PHIUnion,
-                        std::vector<StrongPHIElimination::DomForestNode*>& DF,
-                        std::vector<std::pair<unsigned, unsigned> >& locals) {
-  
-  std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
-  SmallPtrSet<DomForestNode*, 4> visited;
-  
-  // Code is still in SSA form, so we can use MRI::getVRegDef()
-  MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  unsigned DestReg = Inst->getOperand(0).getReg();
-  
-  // DF walk on the DomForest
-  while (!worklist.empty()) {
-    DomForestNode* DFNode = worklist.back();
-    
-    visited.insert(DFNode);
-    
-    bool inserted = false;
-    for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
-         CI != CE; ++CI) {
-      DomForestNode* child = *CI;   
-      
-      // If the current node is live-out of the defining block of one of its
-      // children, insert a copy for it.  NOTE: The paper actually calls for
-      // a more elaborate heuristic for determining whether to insert copies
-      // for the child or the parent.  In the interest of simplicity, we're
-      // just always choosing the parent.
-      if (isLiveOut(DFNode->getReg(),
-          MRI.getVRegDef(child->getReg())->getParent(), LI)) {
-        // Insert copies for parent
-        for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
-          if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
-            unsigned SrcReg = DFNode->getReg();
-            MachineBasicBlock* From = Inst->getOperand(i).getMBB();
-            
-            Waiting[From].insert(std::make_pair(SrcReg, DestReg));
-            UsedByAnother.insert(SrcReg);
-            
-            PHIUnion.erase(SrcReg);
-          }
-        }
-      
-      // If a node is live-in to the defining block of one of its children, but
-      // not live-out, then we need to scan that block for local interferences.
-      } else if (isLiveIn(DFNode->getReg(),
-                          MRI.getVRegDef(child->getReg())->getParent(), LI) ||
-                 MRI.getVRegDef(DFNode->getReg())->getParent() ==
-                                 MRI.getVRegDef(child->getReg())->getParent()) {
-        // Add (p, c) to possible local interferences
-        locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+  DenseMap<unsigned, unsigned> RegRenamingMap;
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+    while (BBI != BBE && BBI->isPHI()) {
+      MachineInstr *PHI = BBI;
+
+      assert(PHI->getNumOperands() > 0);
+
+      unsigned SrcReg = PHI->getOperand(1).getReg();
+      unsigned SrcColor = getRegColor(SrcReg);
+      unsigned NewReg = RegRenamingMap[SrcColor];
+      if (!NewReg) {
+        NewReg = SrcReg;
+        RegRenamingMap[SrcColor] = SrcReg;
       }
-      
-      if (!visited.count(child)) {
-        worklist.push_back(child);
-        inserted = true;
+      MergeLIsAndRename(SrcReg, NewReg);
+
+      unsigned DestReg = PHI->getOperand(0).getReg();
+      if (!InsertedDestCopies.count(DestReg))
+        MergeLIsAndRename(DestReg, NewReg);
+
+      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+        unsigned SrcReg = PHI->getOperand(i).getReg();
+        MergeLIsAndRename(SrcReg, NewReg);
       }
+
+      ++BBI;
+      LI->RemoveMachineInstrFromMaps(PHI);
+      PHI->eraseFromParent();
+      Changed = true;
     }
-    
-    if (!inserted) worklist.pop_back();
   }
-}
 
-/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
-/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
-/// problems.
-///
-/// Based on "Practical Improvements to the Construction and Destruction
-/// of Static Single Assignment Form" by Briggs, et al.
-void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
-                                          std::set<unsigned>& pushed) {
-  // FIXME: This function needs to update LiveIntervals
-  std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
-  
-  std::multimap<unsigned, unsigned> worklist;
-  std::map<unsigned, unsigned> map;
-  
-  // Setup worklist of initial copies
-  for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
-       E = copy_set.end(); I != E; ) {
-    map.insert(std::make_pair(I->first, I->first));
-    map.insert(std::make_pair(I->second, I->second));
-         
-    if (!UsedByAnother.count(I->second)) {
-      worklist.insert(*I);
-      
-      // Avoid iterator invalidation
-      std::multimap<unsigned, unsigned>::iterator OI = I;
-      ++I;
-      copy_set.erase(OI);
-    } else {
-      ++I;
+  // Due to the insertion of copies to split live ranges, the live intervals are
+  // guaranteed to not overlap, except in one case: an original PHI source and a
+  // PHI destination copy. In this case, they have the same value and thus don't
+  // truly intersect, so we merge them into the value live at that point.
+  // FIXME: Is there some better way we can handle this?
+  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+       E = InsertedDestCopies.end(); I != E; ++I) {
+    unsigned DestReg = I->first;
+    unsigned DestColor = getRegColor(DestReg);
+    unsigned NewReg = RegRenamingMap[DestColor];
+
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+    LiveInterval &NewLI = LI->getInterval(NewReg);
+
+    assert(DestLI.ranges.size() == 1
+           && "PHI destination copy's live interval should be a single live "
+               "range from the beginning of the BB to the copy instruction.");
+    LiveRange *DestLR = DestLI.begin();
+    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+    if (!NewVNI) {
+      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+      MachineInstr *CopyInstr = I->second;
+      CopyInstr->getOperand(1).setIsKill(true);
     }
+
+    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+    NewLI.addRange(NewLR);
+
+    LI->removeInterval(DestReg);
+    MRI->replaceRegWith(DestReg, NewReg);
   }
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  MachineFunction* MF = MBB->getParent();
-  MachineRegisterInfo& MRI = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-  
-  SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
-  
-  // Iterate over the worklist, inserting copies
-  while (!worklist.empty() || !copy_set.empty()) {
-    while (!worklist.empty()) {
-      std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
-      std::pair<unsigned, unsigned> curr = *WI;
-      worklist.erase(WI);
-      
-      const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
-      
-      if (isLiveOut(curr.second, MBB, LI)) {
-        // Create a temporary
-        unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-        
-        // Insert copy from curr.second to a temporary at
-        // the Phi defining curr.second
-        MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
-        BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
-                t).addReg(curr.second);
-        DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
-                     << "\n");
-        
-        // Push temporary on Stacks
-        Stacks[curr.second].push_back(t);
-        
-        // Insert curr.second in pushed
-        pushed.insert(curr.second);
-        
-        // Create a live interval for this temporary
-        InsertedPHIDests.push_back(std::make_pair(t, --PI));
-      }
-      
-      // Insert copy from map[curr.first] to curr.second
-      BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
-             TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
-      map[curr.first] = curr.second;
-      DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
-                   << curr.second << "\n");
-      
-      // Push this copy onto InsertedPHICopies so we can
-      // update LiveIntervals with it.
-      MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
-      InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
-      
-      // If curr.first is a destination in copy_set...
-      for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
-           E = copy_set.end(); I != E; )
-        if (curr.first == I->second) {
-          std::pair<unsigned, unsigned> temp = *I;
-          worklist.insert(temp);
-          
-          // Avoid iterator invalidation
-          std::multimap<unsigned, unsigned>::iterator OI = I;
-          ++I;
-          copy_set.erase(OI);
-          
-          break;
-        } else {
-          ++I;
-        }
-    }
-    
-    if (!copy_set.empty()) {
-      std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
-      std::pair<unsigned, unsigned> curr = *CI;
-      worklist.insert(curr);
-      copy_set.erase(CI);
-      
-      LiveInterval& I = LI.getInterval(curr.second);
-      MachineBasicBlock::iterator term = MBB->getFirstTerminator();
-      SlotIndex endIdx = SlotIndex();
-      if (term != MBB->end())
-        endIdx = LI.getInstructionIndex(term);
-      else
-        endIdx = LI.getMBBEndIdx(MBB);
-      
-      if (I.liveAt(endIdx)) {
-        const TargetRegisterClass *RC =
-                                       MF->getRegInfo().getRegClass(curr.first);
-        
-        // Insert a copy from dest to a new temporary t at the end of b
-        unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-        BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
-                TII->get(TargetOpcode::COPY), t).addReg(curr.second);
-        map[curr.second] = t;
-        
-        MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
-        InsertedPHIDests.push_back(std::make_pair(t, --TI));
+
+  // Adjust the live intervals of all PHI source registers to handle the case
+  // where the PHIs in successor blocks were the only later uses of the source
+  // register.
+  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+       E = InsertedSrcCopySet.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->first;
+    unsigned SrcReg = I->second;
+    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+      SrcReg = RenamedRegister;
+
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+    bool isLiveOut = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+        isLiveOut = true;
+        break;
       }
     }
+
+    if (isLiveOut)
+      continue;
+
+    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+    assert(LastUse);
+    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    LastUse->setIsKill(true);
   }
-  
-  // Renumber the instructions so that we can perform the index computations
-  // needed to create new live intervals.
-  LI.renumber();
-  
-  // For copies that we inserted at the ends of predecessors, we construct
-  // live intervals.  This is pretty easy, since we know that the destination
-  // register cannot have be in live at that point previously.  We just have
-  // to make sure that, for registers that serve as inputs to more than one
-  // PHI, we don't create multiple overlapping live intervals.
-  std::set<unsigned> RegHandled;
-  for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
-       InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
-    if (RegHandled.insert(I->first).second) {
-      LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      SlotIndex instrIdx = LI.getInstructionIndex(I->second);
-      if (Int.liveAt(instrIdx.getDefIndex()))
-        Int.removeRange(instrIdx.getDefIndex(),
-                        LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
-                        true);
-      
-      LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
-      R.valno->setCopy(I->second);
-      R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
-    }
+
+  LI->renumber();
+
+  Allocator.Reset();
+  RegNodeMap.clear();
+  PHISrcDefs.clear();
+  InsertedSrcCopySet.clear();
+  InsertedSrcCopyMap.clear();
+  InsertedDestCopies.clear();
+
+  return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+  if (RegNodeMap.count(Reg))
+    return;
+  RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+  Node *N = this;
+  Node *Parent = parent.getPointer();
+  Node *Grandparent = Parent->parent.getPointer();
+
+  while (Parent != Grandparent) {
+    N->parent.setPointer(Grandparent);
+    N = Grandparent;
+    Parent = Parent->parent.getPointer();
+    Grandparent = Parent->parent.getPointer();
   }
+
+  return Parent;
 }
 
-/// InsertCopies - insert copies into MBB and all of its successors
-void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
-                                 SmallPtrSet<MachineBasicBlock*, 16>& visited) {
-  MachineBasicBlock* MBB = MDTN->getBlock();
-  visited.insert(MBB);
-  
-  std::set<unsigned> pushed;
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  // Rewrite register uses from Stacks
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-      I != E; ++I) {
-    if (I->isPHI())
-      continue;
-    
-    for (unsigned i = 0; i < I->getNumOperands(); ++i)
-      if (I->getOperand(i).isReg() &&
-          Stacks[I->getOperand(i).getReg()].size()) {
-        // Remove the live range for the old vreg.
-        LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
-        LiveInterval::iterator OldLR =
-          OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
-        if (OldLR != OldInt.end())
-          OldInt.removeRange(*OldLR, true);
-        
-        // Change the register
-        I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
-        
-        // Add a live range for the new vreg
-        LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
-        VNInfo* FirstVN = *Int.vni_begin();
-        FirstVN->setHasPHIKill(false);
-        LiveRange LR (LI.getMBBStartIdx(I->getParent()),
-                      LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
-                      FirstVN);
-        
-        Int.addRange(LR);
-      }
-  }    
-  
-  // Schedule the copies for this block
-  ScheduleCopies(MBB, pushed);
-  
-  // Recur down the dominator tree.
-  for (MachineDomTreeNode::iterator I = MDTN->begin(),
-       E = MDTN->end(); I != E; ++I)
-    if (!visited.count((*I)->getBlock()))
-      InsertCopies(*I, visited);
-  
-  // As we exit this block, pop the names we pushed while processing it
-  for (std::set<unsigned>::iterator I = pushed.begin(), 
-       E = pushed.end(); I != E; ++I)
-    Stacks[*I].pop_back();
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+  DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+  if (RI == RegNodeMap.end())
+    return 0;
+  Node *Node = RI->second;
+  if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+    return 0;
+  return Node->getLeader()->value;
 }
 
-bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
-                                              unsigned secondary) {
-  
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  LiveInterval& LHS = LI.getOrCreateInterval(primary);
-  LiveInterval& RHS = LI.getOrCreateInterval(secondary);
-  
-  LI.renumber();
-  
-  DenseMap<VNInfo*, VNInfo*> VNMap;
-  for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
-    LiveRange R = *I;
- 
-    SlotIndex Start = R.start;
-    SlotIndex End = R.end;
-    if (LHS.getLiveRangeContaining(Start))
-      return false;
-    
-    if (LHS.getLiveRangeContaining(End))
-      return false;
-    
-    LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
-    if (RI != LHS.end() && RI->start < End)
-      return false;
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+  Node *Node1 = RegNodeMap[Reg1]->getLeader();
+  Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+  if (Node1->rank > Node2->rank) {
+    Node2->parent.setPointer(Node1->getLeader());
+  } else if (Node1->rank < Node2->rank) {
+    Node1->parent.setPointer(Node2->getLeader());
+  } else if (Node1 != Node2) {
+    Node2->parent.setPointer(Node1->getLeader());
+    Node1->rank++;
   }
-  
-  for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
-    LiveRange R = *I;
-    VNInfo* OldVN = R.valno;
-    VNInfo*& NewVN = VNMap[OldVN];
-    if (!NewVN) {
-      NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
-    }
-    
-    LiveRange LR (R.start, R.end, NewVN);
-    LHS.addRange(LR);
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+  Node *Node = RegNodeMap[Reg];
+  Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  Node *DestNode = RegNodeMap[DestReg];
+  if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+    return 0;
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+    if (SrcColor)
+      return SrcColor;
   }
-  
-  LI.removeInterval(RHS.reg);
-  
-  return true;
+  return 0;
 }
 
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
-  LiveIntervals& LI = getAnalysis<LiveIntervals>();
-  
-  // Compute DFS numbers of each block
-  computeDFS(Fn);
-  
-  // Determine which phi node operands need copies
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
-    if (!I->empty() && I->begin()->isPHI())
-      processBlock(I);
-  
-  // Break interferences where two different phis want to coalesce
-  // in the same register.
-  std::set<unsigned> seen;
-  typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
-          RenameSetType;
-  for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
-       I != E; ++I) {
-    for (std::map<unsigned, MachineBasicBlock*>::iterator
-         OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
-      if (!seen.count(OI->first)) {
-        seen.insert(OI->first);
-        ++OI;
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+  Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+  Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+///    congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+///    same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+///   def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+/// 
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+    MachineBasicBlock &MBB,
+    DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+    DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+  // Sort defs by their order in the original basic block, as the code below
+  // assumes that it is processing definitions in dominance order.
+  std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+  std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+  for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+       BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+    for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+         E = (*BBI)->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+
+      // FIXME: This would be faster if it were possible to bail out of checking
+      // an instruction's operands after the explicit defs, but this is incorrect
+      // for variadic instructions, which may appear before register allocation
+      // in the future.
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      unsigned DestReg = MO.getReg();
+      if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+        continue;
+
+      // If the virtual register being defined is not used in any PHI or has
+      // already been isolated, then there are no more interferences to check.
+      unsigned DestColor = getRegColor(DestReg);
+      if (!DestColor)
+        continue;
+
+      // The input to this pass sometimes is not in SSA form in every basic
+      // block, as some virtual registers have redefinitions. We could eliminate
+      // this by fixing the passes that generate the non-SSA code, or we could
+      // handle it here by tracking defining machine instructions rather than
+      // virtual registers. For now, we just handle the situation conservatively
+      // in a way that will possibly lead to false interferences.
+      unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+      unsigned NewParent = CurrentParent;
+      if (NewParent == DestReg)
+        continue;
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+                           || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+
+      // If NewParent is nonzero, then its definition dominates the current
+      // instruction, so it is only necessary to check for the liveness of
+      // NewParent in order to check for an interference.
+      if (NewParent
+          && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+        // If there is an interference, always isolate the new register. This
+        // could be improved by using a heuristic that decides which of the two
+        // registers to isolate.
+        isolateReg(DestReg);
+        CurrentParent = NewParent;
       } else {
-        Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
-        unsigned reg = OI->first;
-        ++OI;
-        I->second.erase(reg);
-        DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
-                     << "\n");
+        // If there is no interference, update ImmediateDominatingParent and set
+        // the CurrentDominatingParent for this color to the current register.
+        ImmediateDominatingParent[DestReg] = NewParent;
+        CurrentParent = DestReg;
       }
     }
   }
-  
-  // Insert copies
-  // FIXME: This process should probably preserve LiveIntervals
-  SmallPtrSet<MachineBasicBlock*, 16> visited;
-  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-  InsertCopies(MDT.getRootNode(), visited);
-  
-  // Perform renaming
-  for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
-       I != E; ++I)
-    while (I->second.size()) {
-      std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
-      
-      DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
-      
-      if (SI->first != I->first) {
-        if (mergeLiveIntervals(I->first, SI->first)) {
-          Fn.getRegInfo().replaceRegWith(SI->first, I->first);
-      
-          if (RenameSets.count(SI->first)) {
-            I->second.insert(RenameSets[SI->first].begin(),
-                             RenameSets[SI->first].end());
-            RenameSets.erase(SI->first);
-          }
-        } else {
-          // Insert a last-minute copy if a conflict was detected.
-          const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
-          BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
-                  TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
-          
-          LI.renumber();
-          
-          LiveInterval& Int = LI.getOrCreateInterval(I->first);
-          SlotIndex instrIdx =
-                     LI.getInstructionIndex(--SI->second->getFirstTerminator());
-          if (Int.liveAt(instrIdx.getDefIndex()))
-            Int.removeRange(instrIdx.getDefIndex(),
-                            LI.getMBBEndIdx(SI->second).getNextSlot(), true);
-
-          LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
-                                            --SI->second->getFirstTerminator());
-          R.valno->setCopy(--SI->second->getFirstTerminator());
-          R.valno->def = instrIdx.getDefIndex();
-          
-          DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
-                       << I->first << "\n");
-        }
+
+  // We now walk the PHIs in successor blocks and check for interferences. This
+  // is necesary because the use of a PHI's operands are logically contained in
+  // the predecessor block. The def of a PHI's destination register is processed
+  // along with the other defs in a basic block.
+
+  CurrentPHIForColor.clear();
+
+  for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+       SE = MBB.succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      MachineInstr *PHI = BBI;
+
+      // If a PHI is already isolated, either by being isolated directly or
+      // having all of its operands isolated, ignore it.
+      unsigned Color = getPHIColor(PHI);
+      if (!Color)
+        continue;
+
+      // Find the index of the PHI operand that corresponds to this basic block.
+      unsigned PredIndex;
+      for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+        if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+          break;
       }
-      
-      LiveInterval& Int = LI.getOrCreateInterval(I->first);
-      const LiveRange* LR =
-                       Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
-      LR->valno->setHasPHIKill(true);
-      
-      I->second.erase(SI->first);
+      assert(PredIndex < PHI->getNumOperands());
+      unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      unsigned &CurrentParent = CurrentDominatingParent[Color];
+      unsigned NewParent = CurrentParent;
+      while (NewParent
+             && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+                 || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+      CurrentParent = NewParent;
+
+      // If there is an interference with a register, always isolate the
+      // register rather than the PHI. It is also possible to isolate the
+      // PHI, but that introduces copies for all of the registers involved
+      // in that PHI.
+      if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+                    && NewParent != PredOperandReg)
+        isolateReg(NewParent);
+
+      std::pair<MachineInstr*, unsigned>
+        &CurrentPHI = CurrentPHIForColor[Color];
+
+      // If two PHIs have the same operand from every shared predecessor, then
+      // they don't actually interfere. Otherwise, isolate the current PHI. This
+      // could possibly be improved, e.g. we could isolate the PHI with the
+      // fewest operands.
+      if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+        isolatePHI(PHI);
+      else
+        CurrentPHI = std::make_pair(PHI, PredOperandReg);
     }
-  
-  // Remove PHIs
-  std::vector<MachineInstr*> phis;
-  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
-    for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
-         BI != BE; ++BI)
-      if (BI->isPHI())
-        phis.push_back(BI);
   }
-  
-  for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
-       I != E; ) {
-    MachineInstr* PInstr = *(I++);
-    
-    // If this is a dead PHI node, then remove it from LiveIntervals.
-    unsigned DestReg = PInstr->getOperand(0).getReg();
-    LiveInterval& PI = LI.getInterval(DestReg);
-    if (PInstr->registerDefIsDead(DestReg)) {
-      if (PI.containsOneValue()) {
-        LI.removeInterval(DestReg);
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+                                              MachineBasicBlock *MBB) {
+  assert(PHI->isPHI());
+  ++NumPHIsLowered;
+  unsigned PHIColor = getPHIColor(PHI);
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    MachineOperand &SrcMO = PHI->getOperand(i);
+
+    // If a source is defined by an implicit def, there is no need to insert a
+    // copy in the predecessor.
+    if (SrcMO.isUndef())
+      continue;
+
+    unsigned SrcReg = SrcMO.getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+    unsigned SrcColor = getRegColor(SrcReg);
+
+    // If neither the PHI nor the operand were isolated, then we only need to
+    // set the phi-kill flag on the VNInfo at this PHI.
+    if (PHIColor && SrcColor == PHIColor) {
+      LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+      SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+      VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+      assert(SrcVNI);
+      SrcVNI->setHasPHIKill(true);
+      continue;
+    }
+
+    unsigned CopyReg = 0;
+    if (PHIColor) {
+      SrcCopyMap::const_iterator I
+        = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+      CopyReg
+        = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+    }
+
+    if (!CopyReg) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      CopyReg = MRI->createVirtualRegister(RC);
+
+      MachineBasicBlock::iterator
+        CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+      unsigned SrcSubReg = SrcMO.getSubReg();
+      MachineInstr *CopyInstr = BuildMI(*PredBB,
+                                        CopyInsertPoint,
+                                        PHI->getDebugLoc(),
+                                        TII->get(TargetOpcode::COPY),
+                                        CopyReg).addReg(SrcReg, 0, SrcSubReg);
+      LI->InsertMachineInstrInMaps(CopyInstr);
+      ++NumSrcCopiesInserted;
+
+      // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+      // the newly added range.
+      LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+      InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+      addReg(CopyReg);
+      if (PHIColor) {
+        unionRegs(PHIColor, CopyReg);
+        assert(getRegColor(CopyReg) != CopyReg);
       } else {
-        SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
-        PI.removeRange(*PI.getLiveRangeContaining(idx), true);
-      }
-    } else {
-      // Trim live intervals of input registers.  They are no longer live into
-      // this block if they died after the PHI.  If they lived after it, don't
-      // trim them because they might have other legitimate uses.
-      for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
-        unsigned reg = PInstr->getOperand(i).getReg();
-        
-        MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
-        LiveInterval& InputI = LI.getInterval(reg);
-        if (MBB != PInstr->getParent() &&
-            InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
-            InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
-          InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
-                             LI.getInstructionIndex(PInstr),
-                             true);
+        PHIColor = CopyReg;
+        assert(getRegColor(CopyReg) == CopyReg);
       }
-      
-      // If the PHI is not dead, then the valno defined by the PHI
-      // now has an unknown def.
-      SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
-      const LiveRange* PLR = PI.getLiveRangeContaining(idx);
-      PLR->valno->setIsPHIDef(true);
-      LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
-                   PLR->start, PLR->valno);
-      PI.addRange(R);
+
+      if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+        InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
     }
-    
-    LI.RemoveMachineInstrFromMaps(PInstr);
-    PInstr->eraseFromParent();
+
+    SrcMO.setReg(CopyReg);
+
+    // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+    // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+    // processed later, but this is still correct to do at this point because we
+    // never rely on LiveIntervals being correct while inserting copies.
+    // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+    // pass does?
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+    if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+      SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
   }
-  
-  LI.renumber();
-  
-  return true;
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  unsigned DestColor = getRegColor(DestReg);
+
+  if (PHIColor && DestColor == PHIColor) {
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+
+    // Set the phi-def flag for the VN at this PHI.
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    assert(DestVNI);
+    DestVNI->setIsPHIDef(true);
+  
+    // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+    // instruction. After PHI elimination, PHI instructions are replaced by VNs
+    // with the phi-def flag set, and the live ranges of these VNs start at the
+    // beginning of the basic block.
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    DestVNI->def = MBBStartIndex;
+    DestLI.addRange(LiveRange(MBBStartIndex,
+                              PHIIndex.getDefIndex(),
+                              DestVNI));
+    return;
+  }
+
+  const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+  unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+  MachineInstr *CopyInstr = BuildMI(*MBB,
+                                    MBB->SkipPHIsAndLabels(MBB->begin()),
+                                    PHI->getDebugLoc(),
+                                    TII->get(TargetOpcode::COPY),
+                                    DestReg).addReg(CopyReg);
+  LI->InsertMachineInstrInMaps(CopyInstr);
+  PHI->getOperand(0).setReg(CopyReg);
+  ++NumDestCopiesInserted;
+
+  // Add the region from the beginning of MBB to the copy instruction to
+  // CopyReg's live interval, and give the VNInfo the phidef flag.
+  LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+  SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+  SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+  VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+                                        CopyInstr,
+                                        LI->getVNInfoAllocator());
+  CopyVNI->setIsPHIDef(true);
+  CopyLI.addRange(LiveRange(MBBStartIndex,
+                            DestCopyIndex.getDefIndex(),
+                            CopyVNI));
+
+  // Adjust DestReg's live interval to adjust for its new definition at
+  // CopyInstr.
+  LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+  SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  assert(DestVNI);
+  DestVNI->def = DestCopyIndex.getDefIndex();
+
+  InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+  if (Reg == NewReg)
+    return;
+
+  LiveInterval &OldLI = LI->getInterval(Reg);
+  LiveInterval &NewLI = LI->getInterval(NewReg);
+
+  // Merge the live ranges of the two registers.
+  DenseMap<VNInfo*, VNInfo*> VNMap;
+  for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+       LRI != LRE; ++LRI) {
+    LiveRange OldLR = *LRI;
+    VNInfo *OldVN = OldLR.valno;
+
+    VNInfo *&NewVN = VNMap[OldVN];
+    if (!NewVN) {
+      NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+      VNMap[OldVN] = NewVN;
+    }
+
+    LiveRange LR(OldLR.start, OldLR.end, NewVN);
+    NewLI.addRange(LR);
+  }
+
+  // Remove the LiveInterval for the register being renamed and replace all
+  // of its defs and uses with the new register.
+  LI->removeInterval(Reg);
+  MRI->replaceRegWith(Reg, NewReg);
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index a815b364d54e..04d3d311b416 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -350,7 +350,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
-    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (MO.isDef()) {
       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
@@ -459,15 +459,19 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+  if (TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
     MaxDuplicateCount = TailDuplicateSize;
 
   if (PreRegAlloc) {
-      // Pre-regalloc tail duplication hurts compile time and doesn't help
-      // much except for indirect branches.
-    if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+    if (TailBB->empty())
+      return false;
+    const TargetInstrDesc &TID = TailBB->back().getDesc();
+    // Pre-regalloc tail duplication hurts compile time and doesn't help
+    // much except for indirect branches and returns.
+    if (!TID.isIndirectBranch() && !TID.isReturn())
       return false;
     // If the target has hardware branch prediction that can handle indirect
     // branches, duplicating them can often make them predictable when there
@@ -500,9 +504,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
   }
-  // Heuristically, don't tail-duplicate calls if it would expand code size,
-  // as it's less likely to be worth the extra cost.
-  if (InstrCount > 1 && HasCall)
+  // Don't tail-duplicate calls before register allocation. Calls presents a
+  // barrier to register allocation so duplicating them may end up increasing
+  // spills.
+  if (InstrCount > 1 && (PreRegAlloc && HasCall))
     return false;
 
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 6e4a0d837ecd..15340a3f1084 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -22,13 +22,18 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<bool> DisableHazardRecognizer(
+  "disable-sched-hazard", cl::Hidden, cl::init(false),
+  cl::desc("Disable hazard detection during preRA scheduling"));
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void
@@ -135,7 +140,7 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isPredicable())
     return false;
-  
+
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
     if (TID.OpInfo[i].isPredicate()) {
       MachineOperand &MO = MI->getOperand(i);
@@ -166,8 +171,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
   MBB.insert(I, MI);
 }
 
-bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
-                                           const MachineInstr *MI1) const {
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+                                      const MachineInstr *MI1,
+                                      const MachineRegisterInfo *MRI) const {
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
@@ -252,9 +259,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
-                              Flags, /*Offset=*/0,
-                              MFI.getObjectSize(FI),
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              Flags, MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
 
@@ -329,8 +336,13 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
 
   // Avoid instructions obviously unsafe for remat.
-  if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
-      TID.mayStore())
+  if (TID.isNotDuplicable() || TID.mayStore() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  // Don't remat inline asm. We have no idea how expensive it is
+  // even if it's side effect free.
+  if (MI->isInlineAsm())
     return false;
 
   // Avoid instructions which load from potentially varying memory.
@@ -414,8 +426,24 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+  return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
 // Default implementation of CreateTargetPostRAHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
-  return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f1e10eec724c..0b7bd98cc692 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -29,10 +29,12 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -45,81 +47,81 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   BSSSection =
-    getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
 
   TextSection =
-    getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_EXECINSTR |
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+                               ELF::SHF_EXECINSTR |
+                               ELF::SHF_ALLOC,
                                SectionKind::getText());
 
   DataSection =
-    getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getDataRel());
 
   ReadOnlySection =
-    getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
                                SectionKind::getReadOnly());
 
   TLSDataSection =
-    getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                               MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
                                SectionKind::getThreadData());
 
   TLSBSSSection =
-    getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                               MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                               MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
                                SectionKind::getThreadBSS());
 
   DataRelSection =
-    getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   DataRelLocalSection =
-    getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRelLocal());
 
   DataRelROSection =
-    getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getReadOnlyWithRel());
 
   DataRelROLocalSection =
-    getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getReadOnlyWithRelLocal());
 
   MergeableConst4Section =
-    getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst4());
 
   MergeableConst8Section =
-    getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst8());
 
   MergeableConst16Section =
-    getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+    getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
                                SectionKind::getMergeableConst16());
 
   StaticCtorSection =
-    getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   StaticDtorSection =
-    getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+    getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
                                SectionKind::getDataRel());
 
   // Exception Handling Sections.
@@ -129,50 +131,50 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
   // runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
   LSDASection =
-    getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC,
+    getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
                                SectionKind::getReadOnly());
-  EHFrameSection =
-    getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                               MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
-                               SectionKind::getDataRel());
-
   // Debug Info Sections.
   DwarfAbbrevSection =
-    getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfInfoSection =
-    getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfLineSection =
-    getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfFrameSection =
-    getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfPubNamesSection =
-    getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfPubTypesSection =
-    getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfStrSection =
-    getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfLocSection =
-    getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfARangesSection =
-    getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfRangesSection =
-    getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
   DwarfMacroInfoSection =
-    getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+    getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
                                SectionKind::getMetadata());
 }
 
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+  return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC,
+                                    SectionKind::getDataRel());
+}
 
 static SectionKind
 getELFKindForNamedSection(StringRef Name, SectionKind K) {
@@ -208,18 +210,18 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
 static unsigned getELFSectionType(StringRef Name, SectionKind K) {
 
   if (Name == ".init_array")
-    return MCSectionELF::SHT_INIT_ARRAY;
+    return ELF::SHT_INIT_ARRAY;
 
   if (Name == ".fini_array")
-    return MCSectionELF::SHT_FINI_ARRAY;
+    return ELF::SHT_FINI_ARRAY;
 
   if (Name == ".preinit_array")
-    return MCSectionELF::SHT_PREINIT_ARRAY;
+    return ELF::SHT_PREINIT_ARRAY;
 
   if (K.isBSS() || K.isThreadBSS())
-    return MCSectionELF::SHT_NOBITS;
+    return ELF::SHT_NOBITS;
 
-  return MCSectionELF::SHT_PROGBITS;
+  return ELF::SHT_PROGBITS;
 }
 
 
@@ -228,24 +230,24 @@ getELFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
 
   if (!K.isMetadata())
-    Flags |= MCSectionELF::SHF_ALLOC;
+    Flags |= ELF::SHF_ALLOC;
 
   if (K.isText())
-    Flags |= MCSectionELF::SHF_EXECINSTR;
+    Flags |= ELF::SHF_EXECINSTR;
 
   if (K.isWriteable())
-    Flags |= MCSectionELF::SHF_WRITE;
+    Flags |= ELF::SHF_WRITE;
 
   if (K.isThreadLocal())
-    Flags |= MCSectionELF::SHF_TLS;
+    Flags |= ELF::SHF_TLS;
 
   // K.isMergeableConst() is left out to honour PR4650
   if (K.isMergeableCString() || K.isMergeableConst4() ||
       K.isMergeableConst8() || K.isMergeableConst16())
-    Flags |= MCSectionELF::SHF_MERGE;
+    Flags |= ELF::SHF_MERGE;
 
   if (K.isMergeableCString())
-    Flags |= MCSectionELF::SHF_STRINGS;
+    Flags |= ELF::SHF_STRINGS;
 
   return Flags;
 }
@@ -261,23 +263,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
   return getContext().getELFSection(SectionName,
                                     getELFSectionType(SectionName, Kind),
-                                    getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".gnu.linkonce.t.";
-  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
-
-  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
-  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
-
-  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
-  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
-  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".gnu.linkonce.d.rel.ro.";
+                                    getELFSectionFlags(Kind), Kind);
 }
 
 /// getSectionPrefixForGlobal - Return the section prefix name used by options
@@ -307,7 +293,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   bool EmitUniquedSection;
   if (Kind.isText())
     EmitUniquedSection = TM.getFunctionSections();
-  else 
+  else
     EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
@@ -315,19 +301,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if ((GV->isWeakForLinker() || EmitUniquedSection) &&
       !Kind.isCommon() && !Kind.isBSS()) {
     const char *Prefix;
-    if (GV->isWeakForLinker())
-      Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    else {
-      assert(EmitUniquedSection);
-      Prefix = getSectionPrefixForGlobal(Kind);
-    }
+    Prefix = getSectionPrefixForGlobal(Kind);
 
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
     Name.append(Sym->getName().begin(), Sym->getName().end());
+    StringRef Group = "";
+    unsigned Flags = getELFSectionFlags(Kind);
+    if (GV->isWeakForLinker()) {
+      Group = Sym->getName();
+      Flags |= ELF::SHF_GROUP;
+    }
+
     return getContext().getELFSection(Name.str(),
                                       getELFSectionType(Name.str(), Kind),
-                                      getELFSectionFlags(Kind), Kind);
+                                      Flags, Kind, 0, Group);
   }
 
   if (Kind.isText()) return TextSection;
@@ -352,10 +340,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
 
 
     std::string Name = SizeSpec + utostr(Align);
-    return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                                      MCSectionELF::SHF_ALLOC |
-                                      MCSectionELF::SHF_MERGE |
-                                      MCSectionELF::SHF_STRINGS,
+    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                      ELF::SHF_ALLOC |
+                                      ELF::SHF_MERGE |
+                                      ELF::SHF_STRINGS,
                                       Kind);
   }
 
@@ -450,7 +438,16 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   IsFunctionEHSymbolGlobal = true;
   IsFunctionEHFrameSymbolPrivate = false;
   SupportsWeakOmittedEHFrame = false;
-  
+
+  Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+  if (T.getOS() == Triple::Darwin) {
+    unsigned MajNum = T.getDarwinMajorNumber();
+    if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger
+      CommDirectiveSupportsAlignment = false;
+    if (MajNum > 9)                 // 10.6 SnowLeopard
+      IsFunctionEHSymbolGlobal = false;
+  }
+
   TargetLoweringObjectFile::Initialize(Ctx, TM);
 
   TextSection // .text
@@ -469,20 +466,20 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     = getContext().getMachOSection("__DATA", "__thread_bss",
                                    MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
                                    SectionKind::getThreadBSS());
-                                   
+
   // TODO: Verify datarel below.
   TLSTLVSection // .tlv
     = getContext().getMachOSection("__DATA", "__thread_vars",
                                    MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
                                    SectionKind::getDataRel());
-                                   
+
   TLSThreadInitSection
     = getContext().getMachOSection("__DATA", "__thread_init",
                           MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
                           SectionKind::getDataRel());
-                                   
+
   CStringSection // .cstring
-    = getContext().getMachOSection("__TEXT", "__cstring", 
+    = getContext().getMachOSection("__TEXT", "__cstring",
                                    MCSectionMachO::S_CSTRING_LITERALS,
                                    SectionKind::getMergeable1ByteCString());
   UStringSection
@@ -493,7 +490,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                    MCSectionMachO::S_4BYTE_LITERALS,
                                    SectionKind::getMergeableConst4());
   EightByteConstantSection // .literal8
-    = getContext().getMachOSection("__TEXT", "__literal8", 
+    = getContext().getMachOSection("__TEXT", "__literal8",
                                    MCSectionMachO::S_8BYTE_LITERALS,
                                    SectionKind::getMergeableConst8());
 
@@ -517,14 +514,14 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
                                    SectionKind::getText());
   ConstTextCoalSection
-    = getContext().getMachOSection("__TEXT", "__const_coal", 
+    = getContext().getMachOSection("__TEXT", "__const_coal",
                                    MCSectionMachO::S_COALESCED,
                                    SectionKind::getReadOnly());
   ConstDataSection  // .const_data
     = getContext().getMachOSection("__DATA", "__const", 0,
                                    SectionKind::getReadOnlyWithRel());
   DataCoalSection
-    = getContext().getMachOSection("__DATA","__datacoal_nt", 
+    = getContext().getMachOSection("__DATA","__datacoal_nt",
                                    MCSectionMachO::S_COALESCED,
                                    SectionKind::getDataRel());
   DataCommonSection
@@ -534,7 +531,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   DataBSSSection
     = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
                                    SectionKind::getBSS());
-  
+
 
   LazySymbolPointerSection
     = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
@@ -566,17 +563,9 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   // Exception Handling.
   LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                              SectionKind::getReadOnlyWithRel());
-  EHFrameSection =
-    getContext().getMachOSection("__TEXT", "__eh_frame",
-                                 MCSectionMachO::S_COALESCED |
-                                 MCSectionMachO::S_ATTR_NO_TOC |
-                                 MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                                 MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                                 SectionKind::getReadOnly());
-
   // Debug Information.
   DwarfAbbrevSection =
-    getContext().getMachOSection("__DWARF", "__debug_abbrev", 
+    getContext().getMachOSection("__DWARF", "__debug_abbrev",
                                  MCSectionMachO::S_ATTR_DEBUG,
                                  SectionKind::getMetadata());
   DwarfInfoSection =
@@ -623,10 +612,19 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
     getContext().getMachOSection("__DWARF", "__debug_inlined",
                                  MCSectionMachO::S_ATTR_DEBUG,
                                  SectionKind::getMetadata());
-                                 
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+  return getContext().getMachOSection("__TEXT", "__eh_frame",
+                                      MCSectionMachO::S_COALESCED |
+                                      MCSectionMachO::S_ATTR_NO_TOC |
+                                      MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                      MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                      SectionKind::getReadOnly());
+}
+
 const MCSection *TargetLoweringObjectFileMachO::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
@@ -665,7 +663,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-  
+
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
   if (Kind.isThreadData()) return TLSDataSection;
@@ -685,7 +683,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isMergeable1ByteCString() &&
       TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
-      
+
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
@@ -721,7 +719,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // with the .zerofill directive (aka .lcomm).
   if (Kind.isBSSLocal())
     return DataBSSSection;
-  
+
   // Otherwise, just drop the variable in the normal data section.
   return DataSection;
 }
@@ -858,13 +856,6 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                                 COFF::IMAGE_SCN_MEM_READ,
                                 SectionKind::getReadOnly());
-  EHFrameSection =
-    getContext().getCOFFSection(".eh_frame",
-                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                COFF::IMAGE_SCN_MEM_READ |
-                                COFF::IMAGE_SCN_MEM_WRITE,
-                                SectionKind::getDataRel());
-
   // Debug info.
   DwarfAbbrevSection =
     getContext().getCOFFSection(".debug_abbrev",
@@ -928,6 +919,15 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                 SectionKind::getMetadata());
 }
 
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+  return getContext().getCOFFSection(".eh_frame",
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+
 static unsigned
 getCOFFSectionFlags(SectionKind K) {
   unsigned Flags = 0;
@@ -938,6 +938,7 @@ getCOFFSectionFlags(SectionKind K) {
   else if (K.isText())
     Flags |=
       COFF::IMAGE_SCN_MEM_EXECUTE |
+      COFF::IMAGE_SCN_MEM_READ |
       COFF::IMAGE_SCN_CNT_CODE;
   else if (K.isBSS ())
     Flags |=
@@ -967,12 +968,12 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 
 static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
   if (Kind.isText())
-    return ".text$linkonce";
+    return ".text$";
   if (Kind.isBSS ())
-    return ".bss$linkonce";
+    return ".bss$";
   if (Kind.isWriteable())
-    return ".data$linkonce";
-  return ".rdata$linkonce";
+    return ".data$";
+  return ".rdata$";
 }
 
 
@@ -987,14 +988,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
     MCSymbol *Sym = Mang->getSymbol(GV);
-    Name.append(Sym->getName().begin(), Sym->getName().end());
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
 
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
 
     return getContext().getCOFFSection(Name.str(), Characteristics,
-                          COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+                          COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
   }
 
   if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 78989c567e42..b3120b8be1ab 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -110,7 +110,7 @@ namespace {
     bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                             MachineBasicBlock::iterator &nmi,
                             MachineFunction::iterator &mbbi,
-                            unsigned RegB, unsigned Dist);
+                            unsigned RegA, unsigned RegB, unsigned Dist);
 
     typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
     bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
@@ -138,7 +138,9 @@ namespace {
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
+    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
@@ -146,10 +148,7 @@ namespace {
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
-      if (StrongPHIElim)
-        AU.addPreservedID(StrongPHIEliminationID);
-      else
-        AU.addPreservedID(PHIEliminationID);
+      AU.addPreservedID(PHIEliminationID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -159,8 +158,11 @@ namespace {
 }
 
 char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
-                "Two-Address instruction pass", false, false);
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
 
 char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
@@ -548,8 +550,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
   unsigned FromRegC = getMappedReg(regC, SrcRegMap);
   unsigned ToRegB = getMappedReg(regB, DstRegMap);
   unsigned ToRegC = getMappedReg(regC, DstRegMap);
-  if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
-      (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+  if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+      ((!FromRegC && !ToRegC) ||
+       regsAreCompatible(FromRegB, ToRegC, TRI) ||
        regsAreCompatible(FromRegC, ToRegB, TRI)))
     return true;
 
@@ -630,7 +633,8 @@ bool
 TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               MachineBasicBlock::iterator &nmi,
                                               MachineFunction::iterator &mbbi,
-                                              unsigned RegB, unsigned Dist) {
+                                              unsigned RegA, unsigned RegB,
+                                              unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
     DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
@@ -650,6 +654,10 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
       mi = NewMI;
       nmi = llvm::next(mi);
     }
+
+    // Update source and destination register maps.
+    SrcRegMap.erase(RegA);
+    DstRegMap.erase(RegB);
     return true;
   }
 
@@ -740,7 +748,7 @@ static bool isSafeToDelete(MachineInstr *MI,
   const TargetInstrDesc &TID = MI->getDesc();
   if (TID.mayStore() || TID.isCall())
     return false;
-  if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+  if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -884,7 +892,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA)) {
       // Try to convert it.
-      if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
         ++NumConvertedTo3Addr;
         return true; // Done with this instruction.
       }
@@ -951,7 +959,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
           if (LV) {
             for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
               MachineOperand &MO = mi->getOperand(i);
-              if (MO.isReg() && MO.getReg() != 0 &&
+              if (MO.isReg() && 
                   TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
                 if (MO.isUse()) {
                   if (MO.isKill()) {
@@ -1013,8 +1021,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
-  BitVector ReMatRegs;
-  ReMatRegs.resize(MRI->getLastVirtReg()+1);
+  BitVector ReMatRegs(MRI->getNumVirtRegs());
 
   typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
     TiedOperandMap;
@@ -1143,7 +1150,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
             TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
-            ReMatRegs.set(regB);
+            ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
             ++NumReMats;
           } else {
             BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
@@ -1229,13 +1236,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Some remat'ed instructions are dead.
-  int VReg = ReMatRegs.find_first();
-  while (VReg != -1) {
+  for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI->use_nodbg_empty(VReg)) {
       MachineInstr *DefMI = MRI->getVRegDef(VReg);
       DefMI->eraseFromParent();
     }
-    VReg = ReMatRegs.find_next(VReg);
   }
 
   // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
@@ -1346,7 +1352,6 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
       continue;
 
     // Insert a copy to replace the original.
-    MachineBasicBlock::iterator InsertLoc = SomeMI;
     MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
                                    SomeMI->getDebugLoc(),
                                    TII->get(TargetOpcode::COPY))
@@ -1412,6 +1417,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
     SmallSet<unsigned, 4> Seen;
     for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
       unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
       if (MI->getOperand(i).getSubReg() ||
           TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
         DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
@@ -1431,7 +1437,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
 
       bool isKill = MI->getOperand(i).isKill();
       if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
-          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+                                         MRI->getRegClass(SrcReg), SubIdx)) {
         // REG_SEQUENCE cannot have duplicated operands, add a copy.
         // Also add an copy if the source is live-in the block. We don't want
         // to end up with a partial-redef of a livein, e.g.
@@ -1460,7 +1468,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
         MachineBasicBlock::iterator InsertLoc = MI;
         MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
                                 MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
-            .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+            .addReg(DstReg, RegState::Define, SubIdx)
             .addReg(SrcReg, getKillRegState(isKill));
         MI->getOperand(i).setReg(0);
         if (LV && isKill)
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 6dd333358bc4..48d8ab1658da 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,16 +44,19 @@ namespace {
     virtual bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    UnreachableBlockElim() : FunctionPass(ID) {}
+    UnreachableBlockElim() : FunctionPass(ID) {
+      initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
     }
   };
 }
 char UnreachableBlockElim::ID = 0;
 INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
-                "Remove unreachable blocks from the CFG", false, false);
+                "Remove unreachable blocks from the CFG", false, false)
 
 FunctionPass *llvm::createUnreachableBlockEliminationPass() {
   return new UnreachableBlockElim();
@@ -106,7 +110,7 @@ namespace {
 char UnreachableMachineBlockElim::ID = 0;
 
 INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
-  "Remove unreachable machine basic blocks", false, false);
+  "Remove unreachable machine basic blocks", false, false)
 
 char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
 
@@ -118,6 +122,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  bool ModifiedPHI = false;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -179,6 +184,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         if (!preds.count(phi->getOperand(i).getMBB())) {
           phi->RemoveOperand(i);
           phi->RemoveOperand(i-1);
+          ModifiedPHI = true;
         }
 
       if (phi->getNumOperands() == 3) {
@@ -188,6 +194,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
         MachineInstr* temp = phi;
         ++phi;
         temp->eraseFromParent();
+        ModifiedPHI = true;
 
         if (Input != Output)
           F.getRegInfo().replaceRegWith(Output, Input);
@@ -201,5 +208,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
 
   F.RenumberBlocks();
 
-  return DeadBlocks.size();
+  return (DeadBlocks.size() || ModifiedPHI);
 }
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 20ffcffa70d3..734b87e62f62 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +49,7 @@ STATISTIC(NumSpills  , "Number of register spills");
 
 char VirtRegMap::ID = 0;
 
-INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
 
 bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   MRI = &mf.getRegInfo();
@@ -74,8 +75,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   EmergencySpillSlots.clear();
   
   SpillSlotToUsesMap.resize(8);
-  ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
-                       TargetRegisterInfo::FirstVirtualRegister);
+  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
 
   allocatableRCRegs.clear();
   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
@@ -89,24 +89,37 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
 }
 
 void VirtRegMap::grow() {
-  unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
-  Virt2PhysMap.grow(LastVirtReg);
-  Virt2StackSlotMap.grow(LastVirtReg);
-  Virt2ReMatIdMap.grow(LastVirtReg);
-  Virt2SplitMap.grow(LastVirtReg);
-  Virt2SplitKillMap.grow(LastVirtReg);
-  ReMatMap.grow(LastVirtReg);
-  ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+  unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+  Virt2PhysMap.resize(NumRegs);
+  Virt2StackSlotMap.resize(NumRegs);
+  Virt2ReMatIdMap.resize(NumRegs);
+  Virt2SplitMap.resize(NumRegs);
+  Virt2SplitKillMap.resize(NumRegs);
+  ReMatMap.resize(NumRegs);
+  ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                      RC->getAlignment());
+  if (LowSpillSlot == NO_STACK_SLOT)
+    LowSpillSlot = SS;
+  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+    HighSpillSlot = SS;
+  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+  unsigned Idx = SS-LowSpillSlot;
+  while (Idx >= SpillSlotToUsesMap.size())
+    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+  return SS;
 }
 
 unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
   unsigned physReg = Hint.second;
-  if (physReg &&
-      TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
     physReg = getPhys(physReg);
   if (Hint.first == 0)
-    return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+    return (TargetRegisterInfo::isPhysicalRegister(physReg))
       ? physReg : 0;
   return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
 }
@@ -116,18 +129,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
          "attempt to assign stack slot to already spilled register");
   const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                 RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  unsigned Idx = SS-LowSpillSlot;
-  while (Idx >= SpillSlotToUsesMap.size())
-    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
-  Virt2StackSlotMap[virtReg] = SS;
   ++NumSpills;
-  return SS;
+  return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
 }
 
 void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
@@ -160,14 +163,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
     EmergencySpillSlots.find(RC);
   if (I != EmergencySpillSlots.end())
     return I->second;
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                 RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  EmergencySpillSlots[RC] = SS;
-  return SS;
+  return EmergencySpillSlots[RC] = createSpillSlot(RC);
 }
 
 void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
@@ -232,10 +228,11 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   UnusedRegs.resize(NumRegs);
 
   BitVector Used(NumRegs);
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
-    if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      Used.set(Virt2PhysMap[i]);
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+      Used.set(Virt2PhysMap[Reg]);
+  }
 
   BitVector Allocatable = TRI->getAllocatableSet(*MF);
   bool AnyUnused = false;
@@ -258,23 +255,97 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
   return AnyUnused;
 }
 
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+  DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+               << "********** Function: "
+               << MF->getFunction()->getName() << '\n');
+
+  SmallVector<unsigned, 8> SuperKills;
+
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    DEBUG(MBBI->print(dbgs(), Indexes));
+    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+         MII != MIE;) {
+      MachineInstr *MI = MII;
+      ++MII;
+
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+           MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        MachineOperand &MO = *MOI;
+        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;
+        unsigned VirtReg = MO.getReg();
+        unsigned PhysReg = getPhys(VirtReg);
+        assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+        // Preserve semantics of sub-register operands.
+        if (MO.getSubReg()) {
+          // A virtual register kill refers to the whole register, so we may
+          // have to add <imp-use,kill> operands for the super-register.
+          if (MO.isUse() && MO.isKill() && !MO.isUndef())
+            SuperKills.push_back(PhysReg);
+
+          // We don't have to deal with sub-register defs because
+          // LiveIntervalAnalysis already added the necessary <imp-def>
+          // operands.
+
+          // PhysReg operands cannot have subregister indexes.
+          PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+          assert(PhysReg && "Invalid SubReg for physical register");
+          MO.setSubReg(0);
+        }
+        // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+        // we need the inlining here.
+        MO.setReg(PhysReg);
+      }
+
+      // Add any missing super-register kills after rewriting the whole
+      // instruction.
+      while (!SuperKills.empty())
+        MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+      DEBUG(dbgs() << "> " << *MI);
+
+      // Finally, remove any identity copies.
+      if (MI->isIdentityCopy()) {
+        DEBUG(dbgs() << "Deleting identity copy.\n");
+        RemoveMachineInstrFromMaps(MI);
+        if (Indexes)
+          Indexes->removeMachineInstrFromMaps(MI);
+        // It's safe to erase MI because MII has already been incremented.
+        MI->eraseFromParent();
+      }
+    }
+  }
+
+  // Tell MRI about physical registers in use.
+  for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+    if (!MRI->reg_nodbg_empty(Reg))
+      MRI->setPhysRegUsed(Reg);
+}
+
 void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
   const MachineRegisterInfo &MRI = MF->getRegInfo();
 
   OS << "********** REGISTER MAP **********\n";
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
-    if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
-         << "] " << MRI.getRegClass(i)->getName() << "\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> "
+         << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+         << MRI.getRegClass(Reg)->getName() << "\n";
+    }
   }
 
-  for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
-         e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
-    if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
-      OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
-         << "] " << MRI.getRegClass(i)->getName() << "\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+         << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
   OS << '\n';
 }
 
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 8b6082d18193..ba50f4e42302 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -35,6 +35,7 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterInfo;
   class raw_ostream;
+  class SlotIndexes;
 
   class VirtRegMap : public MachineFunctionPass {
   public:
@@ -80,7 +81,7 @@ namespace llvm {
 
     /// Virt2SplitKillMap - This is splitted virtual register to its last use
     /// (kill) index mapping.
-    IndexedMap<SlotIndex> Virt2SplitKillMap;
+    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
 
     /// ReMatMap - This is virtual register to re-materialized instruction
     /// mapping. Each virtual register whose definition is going to be
@@ -134,6 +135,9 @@ namespace llvm {
     /// UnusedRegs - A list of physical registers that have not been used.
     BitVector UnusedRegs;
 
+    /// createSpillSlot - Allocate a spill slot for RC from MFI.
+    unsigned createSpillSlot(const TargetRegisterClass *RC);
+
     VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
     void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
 
@@ -153,10 +157,13 @@ namespace llvm {
     }
 
     MachineFunction &getMachineFunction() const {
-      assert(MF && "getMachineFunction called before runOnMAchineFunction");
+      assert(MF && "getMachineFunction called before runOnMachineFunction");
       return *MF;
     }
 
+    MachineRegisterInfo &getRegInfo() const { return *MRI; }
+    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
     void grow();
 
     /// @brief returns true if the specified virtual register is
@@ -207,10 +214,19 @@ namespace llvm {
     }
 
     /// @brief returns the live interval virtReg is split from.
-    unsigned getPreSplitReg(unsigned virtReg) {
+    unsigned getPreSplitReg(unsigned virtReg) const {
       return Virt2SplitMap[virtReg];
     }
 
+    /// getOriginal - Return the original virtual register that VirtReg descends
+    /// from through splitting.
+    /// A register that was not created by splitting is its own original.
+    /// This operation is idempotent.
+    unsigned getOriginal(unsigned VirtReg) const {
+      unsigned Orig = getPreSplitReg(VirtReg);
+      return Orig ? Orig : VirtReg;
+    }
+
     /// @brief returns true if the specified virtual register is not
     /// mapped to a stack slot or rematerialized.
     bool isAssignedReg(unsigned virtReg) const {
@@ -426,12 +442,12 @@ namespace llvm {
 
     /// @brief Mark the specified register as being implicitly defined.
     void setIsImplicitlyDefined(unsigned VirtReg) {
-      ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
 
     /// @brief Returns true if the virtual register is implicitly defined.
     bool isImplicitlyDefined(unsigned VirtReg) const {
-      return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
     }
 
     /// @brief Updates information about the specified virtual register's value
@@ -487,6 +503,13 @@ namespace llvm {
       return 0;
     }
 
+    /// rewrite - Rewrite all instructions in MF to use only physical registers
+    /// by mapping all virtual register operands to their assigned physical
+    /// registers.
+    ///
+    /// @param Indexes Optionally remove deleted instructions from indexes.
+    void rewrite(SlotIndexes *Indexes);
+
     void print(raw_ostream &OS, const Module* M = 0) const;
     void dump() const;
   };
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 240d28cf3011..458a2134bf4a 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -22,8 +22,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include <algorithm>
 using namespace llvm;
 
 STATISTIC(NumDSE     , "Number of dead stores elided");
@@ -216,7 +216,8 @@ public:
                    << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
       DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+          << (CanClobber ? " canclobber" : "") << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -297,7 +298,7 @@ ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
   const TargetLowering *TL = MF.getTarget().getTargetLowering();
 
   if (!TL->isTypeLegal(TL->getPointerTy()))
-    // Believe it or not, this is true on PIC16.
+    // Believe it or not, this is true on 16-bit targets like PIC16.
     return InsertLoc;
 
   const TargetRegisterClass *ptrRegClass =
@@ -462,25 +463,70 @@ static void findSinglePredSuccessor(MachineBasicBlock *MBB,
   }
 }
 
-/// InvalidateKill - Invalidate register kill information for a specific
-/// register. This also unsets the kills marker on the last kill operand.
-static void InvalidateKill(unsigned Reg,
-                           const TargetRegisterInfo* TRI,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps) {
-  if (RegKills[Reg]) {
-    KillOps[Reg]->setIsKill(false);
-    // KillOps[Reg] might be a def of a super-register.
-    unsigned KReg = KillOps[Reg]->getReg();
-    KillOps[KReg] = NULL;
-    RegKills.reset(KReg);
-    for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-      if (RegKills[*SR]) {
-        KillOps[*SR]->setIsKill(false);
-        KillOps[*SR] = NULL;
-        RegKills.reset(*SR);
-      }
-    }
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+  MachineOperand *KillOp = KillOps[Reg];
+  KillOp->setIsKill(false);
+  // KillOps[Reg] might be a def of a super-register.
+  unsigned KReg = KillOp->getReg();
+  if (!RegKills[KReg])
+    return;
+
+  assert(KillOps[KReg] == KillOp && "invalid superreg kill flags");
+  KillOps[KReg] = NULL;
+  RegKills.reset(KReg);
+
+  // If it's a def of a super-register. Its other sub-regsters are no
+  // longer killed as well.
+  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+    assert(KillOps[*SR] == KillOp && "invalid subreg kill flags");
+    KillOps[*SR] = NULL;
+    RegKills.reset(*SR);
+  }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+                          const TargetRegisterInfo* TRI, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    return;
+  }
+  // No previous kill for this reg. Check for subreg kills as well.
+  // d4 =
+  // store d4, fi#0
+  // ...
+  //    = s8<kill>
+  // ...
+  //    = d4  <avoiding reload>
+  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+    unsigned SReg = *SR;
+    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
   }
 }
 
@@ -502,15 +548,22 @@ static void InvalidateKills(MachineInstr &MI,
       KillRegs->push_back(Reg);
     assert(Reg < KillOps.size());
     if (KillOps[Reg] == &MO) {
+      // This operand was the kill, now no longer.
       KillOps[Reg] = NULL;
       RegKills.reset(Reg);
       for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
         if (RegKills[*SR]) {
+          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
           KillOps[*SR] = NULL;
           RegKills.reset(*SR);
         }
       }
     }
+    else {
+      // This operand may have reused a previously killed reg. Keep it live in
+      // case it continues to be used after erasing this instruction.
+      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+    }
   }
 }
 
@@ -578,44 +631,8 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
     if (Reg == 0)
       continue;
 
-    if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
-      // That can't be right. Register is killed but not re-defined and it's
-      // being reused. Let's fix that.
-      KillOps[Reg]->setIsKill(false);
-      // KillOps[Reg] might be a def of a super-register.
-      unsigned KReg = KillOps[Reg]->getReg();
-      KillOps[KReg] = NULL;
-      RegKills.reset(KReg);
-
-      // Must be a def of a super-register. Its other sub-regsters are no
-      // longer killed as well.
-      for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-        KillOps[*SR] = NULL;
-        RegKills.reset(*SR);
-      }
-    } else {
-      // Check for subreg kills as well.
-      // d4 =
-      // store d4, fi#0
-      // ...
-      //    = s8<kill>
-      // ...
-      //    = d4  <avoiding reload>
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        unsigned SReg = *SR;
-        if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
-          KillOps[SReg]->setIsKill(false);
-          unsigned KReg = KillOps[SReg]->getReg();
-          KillOps[KReg] = NULL;
-          RegKills.reset(KReg);
-
-          for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
-            KillOps[*SSR] = NULL;
-            RegKills.reset(*SSR);
-          }
-        }
-      }
-    }
+    // This operand may have reused a previously killed reg. Keep it live.
+    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
 
     if (MO.isKill()) {
       RegKills.set(Reg);
@@ -770,7 +787,8 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
       NotAvailable.insert(Reg);
     else {
       MBB.addLiveIn(Reg);
-      InvalidateKill(Reg, TRI, RegKills, KillOps);
+      if (RegKills[Reg])
+        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
     }
 
     // Skip over the same register.
@@ -1056,6 +1074,7 @@ class LocalRewriter : public VirtRegRewriter {
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
   VirtRegMap *VRM;
+  LiveIntervals *LIs;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
   DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
@@ -1068,6 +1087,11 @@ public:
                             LiveIntervals* LIs);
 
 private:
+  void EraseInstr(MachineInstr *MI) {
+    VRM->RemoveMachineInstrFromMaps(MI);
+    LIs->RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+  }
 
   bool OptimizeByUnfold2(unsigned VirtReg, int SS,
                          MachineBasicBlock::iterator &MII,
@@ -1110,6 +1134,12 @@ private:
 
   bool InsertSpills(MachineInstr *MI);
 
+  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                   std::vector<MachineInstr*> &MaybeDeadStores,
+                   BitVector &RegKills,
+                   ReuseInfo &ReusedOperands,
+                   std::vector<MachineOperand*> &KillOps);
+
   void RewriteMBB(LiveIntervals *LIs,
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps);
@@ -1117,17 +1147,18 @@ private:
 }
 
 bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
-                                         LiveIntervals* LIs) {
+                                         LiveIntervals* lis) {
   MRI = &MF.getRegInfo();
   TRI = MF.getTarget().getRegisterInfo();
   TII = MF.getTarget().getInstrInfo();
   VRM = &vrm;
+  LIs = lis;
   AllocatableRegs = TRI->getAllocatableSet(MF);
   DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
         << MF.getFunction()->getName() << "':\n");
   DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
         " reloads!) ****\n");
-  DEBUG(MF.dump());
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
 
   // Spills - Keep track of which spilled values are available in physregs
   // so that we can choose to reuse the physregs instead of emitting
@@ -1178,7 +1209,7 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
   }
 
   DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-  DEBUG(MF.dump());
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
 
   // Mark unused spill slots.
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1190,10 +1221,8 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
         MFI->RemoveStackObject(SS);
         for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
           MachineInstr *DVMI = DbgValues[j];
-          MachineBasicBlock *DVMBB = DVMI->getParent();
           DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
-          VRM->RemoveMachineInstrFromMaps(DVMI);
-          DVMBB->erase(DVMI);
+          EraseInstr(DVMI);
         }
         ++NumDSS;
       }
@@ -1273,8 +1302,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
   VRM->transferRestorePts(&MI, NewMIs[0]);
   MII = MBB->insert(MII, NewMIs[0]);
   InvalidateKills(MI, TRI, RegKills, KillOps);
-  VRM->RemoveMachineInstrFromMaps(&MI);
-  MBB->erase(&MI);
+  EraseInstr(&MI);
   ++NumModRefUnfold;
 
   // Unfold next instructions that fold the same SS.
@@ -1289,8 +1317,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
     VRM->transferRestorePts(&NextMI, NewMIs[0]);
     MBB->insert(NextMII, NewMIs[0]);
     InvalidateKills(NextMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(&NextMI);
-    MBB->erase(&NextMI);
+    EraseInstr(&NextMI);
     ++NumModRefUnfold;
     // Skip over dbg_value instructions.
     while (NextMII != MBB->end() && NextMII->isDebugValue())
@@ -1417,8 +1444,7 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
         VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
         MII = FoldedMI;
         InvalidateKills(MI, TRI, RegKills, KillOps);
-        VRM->RemoveMachineInstrFromMaps(&MI);
-        MBB->erase(&MI);
+        EraseInstr(&MI);
         return true;
       }
     }
@@ -1524,14 +1550,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
 
     // Delete all 3 old instructions.
     InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(ReloadMI);
-    MBB->erase(ReloadMI);
+    EraseInstr(ReloadMI);
     InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(DefMI);
-    MBB->erase(DefMI);
+    EraseInstr(DefMI);
     InvalidateKills(MI, TRI, RegKills, KillOps);
-    VRM->RemoveMachineInstrFromMaps(&MI);
-    MBB->erase(&MI);
+    EraseInstr(&MI);
 
     // If NewReg was previously holding value of some SS, it's now clobbered.
     // This has to be done now because it's a physical register. When this
@@ -1574,8 +1597,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
     bool CheckDef = PrevMII != MBB->begin();
     if (CheckDef)
       --PrevMII;
-    VRM->RemoveMachineInstrFromMaps(LastStore);
-    MBB->erase(LastStore);
+    EraseInstr(LastStore);
     if (CheckDef) {
       // Look at defs of killed registers on the store. Mark the defs
       // as dead since the store has been deleted and they aren't
@@ -1586,8 +1608,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
           MachineInstr *DeadDef = PrevMII;
           if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
             // FIXME: This assumes a remat def does not have side effects.
-            VRM->RemoveMachineInstrFromMaps(DeadDef);
-            MBB->erase(DeadDef);
+            EraseInstr(DeadDef);
             ++NumDRM;
           }
         }
@@ -1612,10 +1633,18 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
 /// effect and all of its defs are dead.
 static bool isSafeToDelete(MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
-  if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+  if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
       TID.isCall() || TID.isBarrier() || TID.isReturn() ||
-      TID.hasUnmodeledSideEffects())
+      MI.isLabel() || MI.isDebugValue() ||
+      MI.hasUnmodeledSideEffects())
     return false;
+
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI.isInlineAsm())
+    return false;
+
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.getReg())
@@ -1675,8 +1704,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills,
         LastUD->setIsDead();
         break;
       }
-      VRM->RemoveMachineInstrFromMaps(LastUDMI);
-      MBB->erase(LastUDMI);
+      EraseInstr(LastUDMI);
     } else {
       LastUD->setIsKill();
       RegKills.set(Reg);
@@ -1764,6 +1792,10 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
                    << TRI->getName(InReg) << " for vreg"
                    << VirtReg <<" instead of reloading into physreg "
                    << TRI->getName(Phys) << '\n');
+
+      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+      // the kill flags for the current instruction after processing it.
+
       ++NumOmitted;
       continue;
     } else if (InReg && InReg != Phys) {
@@ -1828,7 +1860,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
   return true;
 }
 
-/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
 /// true if spills were inserted.
 bool LocalRewriter::InsertSpills(MachineInstr *MI) {
   if (!VRM->isSpillPt(MI))
@@ -1856,6 +1888,349 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
 }
 
 
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                                std::vector<MachineInstr*> &MaybeDeadStores,
+                                BitVector &RegKills,
+                                ReuseInfo &ReusedOperands,
+                                std::vector<MachineOperand*> &KillOps) {
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  SmallVector<unsigned, 4> VirtUseOps;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;   // Ignore non-register operands.
+
+    unsigned VirtReg = MO.getReg();
+
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+      // Ignore physregs for spilling, but remember that it is used by this
+      // function.
+      MRI->setPhysRegUsed(VirtReg);
+      continue;
+    }
+
+    // We want to process implicit virtual register uses first.
+    if (MO.isImplicit())
+      // If the virtual register is implicitly defined, emit a implicit_def
+      // before so scavenger knows it's "defined".
+      // FIXME: This is a horrible hack done the by register allocator to
+      // remat a definition with virtual register operand.
+      VirtUseOps.insert(VirtUseOps.begin(), i);
+    else
+      VirtUseOps.push_back(i);
+
+    // A partial def causes problems because the same operand both reads and
+    // writes the register. This rewriter is designed to rewrite uses and defs
+    // separately, so a partial def would already have been rewritten to a
+    // physreg by the time we get to processing defs.
+    // Add an implicit use operand to model the partial def.
+    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+      MI.addOperand(MachineOperand::CreateReg(VirtReg,
+                                              false,  // isDef
+                                              true)); // isImplicit
+      DEBUG(dbgs() << "Partial redef: " << MI);
+    }
+  }
+
+  // Process all of the spilled uses and all non spilled reg references.
+  SmallVector<int, 2> PotentialDeadStoreSlots;
+  KilledMIRegs.clear();
+  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+    unsigned i = VirtUseOps[j];
+    unsigned VirtReg = MI.getOperand(i).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+           "Not a virtual register?");
+
+    unsigned SubIdx = MI.getOperand(i).getSubReg();
+    if (VRM->isAssignedReg(VirtReg)) {
+      // This virtual register was assigned a physreg!
+      unsigned Phys = VRM->getPhys(VirtReg);
+      MRI->setPhysRegUsed(Phys);
+      if (MI.getOperand(i).isDef())
+        ReusedOperands.markClobbered(Phys);
+      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+      if (VRM->isImplicitlyDefined(VirtReg))
+        // FIXME: Is this needed?
+        BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+      continue;
+    }
+
+    // This virtual register is now known to be a spilled value.
+    if (!MI.getOperand(i).isUse())
+      continue;  // Handle defs in the loop below (handle use&def here though)
+
+    bool AvoidReload = MI.getOperand(i).isUndef();
+    // Check if it is defined by an implicit def. It should not be spilled.
+    // Note, this is for correctness reason. e.g.
+    // 8   %reg1024<def> = IMPLICIT_DEF
+    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+    // The live range [12, 14) are not part of the r1024 live interval since
+    // it's defined by an implicit def. It will not conflicts with live
+    // interval of r1025. Now suppose both registers are spilled, you can
+    // easily see a situation where both registers are reloaded before
+    // the INSERT_SUBREG and both target registers that would overlap.
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    int ReuseSlot = SSorRMId;
+
+    // Check to see if this stack slot is available.
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+    // If this is a sub-register use, make sure the reuse register is in the
+    // right register class. For example, for x86 not all of the 32-bit
+    // registers have accessible sub-registers.
+    // Similarly so for EXTRACT_SUBREG. Consider this:
+    // EDI = op
+    // MOV32_mr fi#1, EDI
+    // ...
+    //       = EXTRACT_SUBREG fi#1
+    // fi#1 is available in EDI, but it cannot be reused because it's not in
+    // the right register file.
+    if (PhysReg && !AvoidReload && SubIdx) {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      if (!RC->contains(PhysReg))
+        PhysReg = 0;
+    }
+
+    if (PhysReg && !AvoidReload) {
+      // This spilled operand might be part of a two-address operand.  If this
+      // is the case, then changing it will necessarily require changing the
+      // def part of the instruction as well.  However, in some cases, we
+      // aren't allowed to modify the reused register.  If none of these cases
+      // apply, reuse it.
+      bool CanReuse = true;
+      bool isTied = MI.isRegTiedToDefOperand(i);
+      if (isTied) {
+        // Okay, we have a two address operand.  We can reuse this physreg as
+        // long as we are allowed to clobber the value and there isn't an
+        // earlier def that has already clobbered the physreg.
+        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+          Spills.canClobberPhysReg(PhysReg);
+      }
+      // If this is an asm, and a PhysReg alias is used elsewhere as an
+      // earlyclobber operand, we can't also use it as an input.
+      if (MI.isInlineAsm()) {
+        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+          MachineOperand &MOk = MI.getOperand(k);
+          if (MOk.isReg() && MOk.isEarlyClobber() &&
+              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+            CanReuse = false;
+            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+                         << " for vreg" << VirtReg << ": " << MOk << '\n');
+            break;
+          }
+        }
+      }
+
+      if (CanReuse) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg "
+              << TRI->getName(PhysReg) << " for vreg"
+              << VirtReg <<" instead of reloading into physreg "
+              << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+
+        // Reusing a physreg may resurrect it. But we expect ProcessUses to
+        // update the kill flags for the current instr after processing it.
+
+        // The only technical detail we have is that we don't know that
+        // PhysReg won't be clobbered by a reloaded stack slot that occurs
+        // later in the instruction.  In particular, consider 'op V1, V2'.
+        // If V1 is available in physreg R0, we would choose to reuse it
+        // here, instead of reloading it into the register the allocator
+        // indicated (say R1).  However, V2 might have to be reloaded
+        // later, and it might indicate that it needs to live in R0.  When
+        // this occurs, we need to have information available that
+        // indicates it is safe to use R1 for the reload instead of R0.
+        //
+        // To further complicate matters, we might conflict with an alias,
+        // or R0 and R1 might not be compatible with each other.  In this
+        // case, we actually insert a reload for V1 in R1, ensuring that
+        // we can get at R0 or its alias.
+        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                VRM->getPhys(VirtReg), VirtReg);
+        if (isTied)
+          // Only mark it clobbered if this is a use&def operand.
+          ReusedOperands.markClobbered(PhysReg);
+        ++NumReused;
+
+        if (MI.getOperand(i).isKill() &&
+            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+          // The store of this spilled value is potentially dead, but we
+          // won't know for certain until we've confirmed that the re-use
+          // above is valid, which means waiting until the other operands
+          // are processed. For now we just track the spill slot, we'll
+          // remove it after the other operands are processed if valid.
+
+          PotentialDeadStoreSlots.push_back(ReuseSlot);
+        }
+
+        // Mark is isKill if it's there no other uses of the same virtual
+        // register and it's not a two-address operand. IsKill will be
+        // unset if reg is reused.
+        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+        continue;
+      }  // CanReuse
+
+      // Otherwise we have a situation where we have a two-address instruction
+      // whose mod/ref operand needs to be reloaded.  This reload is already
+      // available in some register "PhysReg", but if we used PhysReg as the
+      // operand to our 2-addr instruction, the instruction would modify
+      // PhysReg.  This isn't cool if something later uses PhysReg and expects
+      // to get its initial value.
+      //
+      // To avoid this problem, and to avoid doing a load right after a store,
+      // we emit a copy from PhysReg into the designated register for this
+      // operand.
+      //
+      // This case also applies to an earlyclobber'd PhysReg.
+      unsigned DesignatedReg = VRM->getPhys(VirtReg);
+      assert(DesignatedReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        DesignatedReg = ReusedOperands.
+          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                          MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      // If the mapped designated register is actually the physreg we have
+      // incoming, we don't need to inserted a dead copy.
+      if (DesignatedReg == PhysReg) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+              << " for vreg" << VirtReg
+              << " instead of reloading into same physreg.\n");
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+        ReusedOperands.markClobbered(RReg);
+        ++NumReused;
+        continue;
+      }
+
+      MRI->setPhysRegUsed(DesignatedReg);
+      ReusedOperands.markClobbered(DesignatedReg);
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY),
+                                     DesignatedReg).addReg(PhysReg);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      // This invalidates DesignatedReg.
+      Spills.ClobberPhysReg(DesignatedReg);
+
+      Spills.addAvailable(ReuseSlot, DesignatedReg);
+      unsigned RReg =
+        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+      ++NumReused;
+      continue;
+    } // if (PhysReg)
+
+    // Otherwise, reload it and remember that we have it.
+    PhysReg = VRM->getPhys(VirtReg);
+    assert(PhysReg && "Must map virtreg to physreg!");
+
+    // Note that, if we reused a register for a previous operand, the
+    // register we want to reload into might not actually be
+    // available.  If this occurs, use the register indicated by the
+    // reuser.
+    if (ReusedOperands.hasReuses())
+      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+    MRI->setPhysRegUsed(PhysReg);
+    ReusedOperands.markClobbered(PhysReg);
+    if (AvoidReload)
+      ++NumAvoided;
+    else {
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+
+      if (DoReMat) {
+        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+      } else {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+        MachineInstr *LoadMI = prior(InsertLoc);
+        VRM->addSpillSlotUse(SSorRMId, LoadMI);
+        ++NumLoads;
+        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+      }
+      // This invalidates PhysReg.
+      Spills.ClobberPhysReg(PhysReg);
+
+      // Any stores to this stack slot are not dead anymore.
+      if (!DoReMat)
+        MaybeDeadStores[SSorRMId] = NULL;
+      Spills.addAvailable(SSorRMId, PhysReg);
+      // Assumes this is the last use. IsKill will be unset if reg is reused
+      // unless it's a two-address operand.
+      if (!MI.isRegTiedToDefOperand(i) &&
+          KilledMIRegs.count(VirtReg) == 0) {
+        MI.getOperand(i).setIsKill();
+        KilledMIRegs.insert(VirtReg);
+      }
+
+      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+    }
+    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+    MI.getOperand(i).setReg(RReg);
+    MI.getOperand(i).setSubReg(0);
+  }
+
+  // Ok - now we can remove stores that have been confirmed dead.
+  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+    // This was the last use and the spilled value is still available
+    // for reuse. That means the spill was unnecessary!
+    int PDSSlot = PotentialDeadStoreSlots[j];
+    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+    if (DeadStore) {
+      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+      EraseInstr(DeadStore);
+      MaybeDeadStores[PDSSlot] = NULL;
+      ++NumDSE;
+    }
+  }
+}
+
 /// rewriteMBB - Keep track of which spills are available even after the
 /// register allocator is done with them.  If possible, avoid reloading vregs.
 void
@@ -1880,9 +2255,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
   // ReMatDefs - These are rematerializable def MIs which are not deleted.
   SmallSet<MachineInstr*, 4> ReMatDefs;
 
-  // Clear kill info.
-  SmallSet<unsigned, 2> KilledMIRegs;
-
   // Keep track of the registers we have already spilled in case there are
   // multiple defs of the same register in MI.
   SmallSet<unsigned, 8> SpilledMIRegs;
@@ -1918,323 +2290,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     /// ReusedOperands - Keep track of operand reuse in case we need to undo
     /// reuse.
     ReuseInfo ReusedOperands(MI, TRI);
-    SmallVector<unsigned, 4> VirtUseOps;
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;   // Ignore non-register operands.
-
-      unsigned VirtReg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-        // Ignore physregs for spilling, but remember that it is used by this
-        // function.
-        MRI->setPhysRegUsed(VirtReg);
-        continue;
-      }
-
-      // We want to process implicit virtual register uses first.
-      if (MO.isImplicit())
-        // If the virtual register is implicitly defined, emit a implicit_def
-        // before so scavenger knows it's "defined".
-        // FIXME: This is a horrible hack done the by register allocator to
-        // remat a definition with virtual register operand.
-        VirtUseOps.insert(VirtUseOps.begin(), i);
-      else
-        VirtUseOps.push_back(i);
-    }
-
-    // Process all of the spilled uses and all non spilled reg references.
-    SmallVector<int, 2> PotentialDeadStoreSlots;
-    KilledMIRegs.clear();
-    for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-      unsigned i = VirtUseOps[j];
-      unsigned VirtReg = MI.getOperand(i).getReg();
-      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-             "Not a virtual register?");
-
-      unsigned SubIdx = MI.getOperand(i).getSubReg();
-      if (VRM->isAssignedReg(VirtReg)) {
-        // This virtual register was assigned a physreg!
-        unsigned Phys = VRM->getPhys(VirtReg);
-        MRI->setPhysRegUsed(Phys);
-        if (MI.getOperand(i).isDef())
-          ReusedOperands.markClobbered(Phys);
-        substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-        if (VRM->isImplicitlyDefined(VirtReg))
-          // FIXME: Is this needed?
-          BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                  TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-        continue;
-      }
-
-      // This virtual register is now known to be a spilled value.
-      if (!MI.getOperand(i).isUse())
-        continue;  // Handle defs in the loop below (handle use&def here though)
-
-      bool AvoidReload = MI.getOperand(i).isUndef();
-      // Check if it is defined by an implicit def. It should not be spilled.
-      // Note, this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      bool DoReMat = VRM->isReMaterialized(VirtReg);
-      int SSorRMId = DoReMat
-        ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-      int ReuseSlot = SSorRMId;
-
-      // Check to see if this stack slot is available.
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-      // If this is a sub-register use, make sure the reuse register is in the
-      // right register class. For example, for x86 not all of the 32-bit
-      // registers have accessible sub-registers.
-      // Similarly so for EXTRACT_SUBREG. Consider this:
-      // EDI = op
-      // MOV32_mr fi#1, EDI
-      // ...
-      //       = EXTRACT_SUBREG fi#1
-      // fi#1 is available in EDI, but it cannot be reused because it's not in
-      // the right register file.
-      if (PhysReg && !AvoidReload && SubIdx) {
-        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-        if (!RC->contains(PhysReg))
-          PhysReg = 0;
-      }
-
-      if (PhysReg && !AvoidReload) {
-        // This spilled operand might be part of a two-address operand.  If this
-        // is the case, then changing it will necessarily require changing the
-        // def part of the instruction as well.  However, in some cases, we
-        // aren't allowed to modify the reused register.  If none of these cases
-        // apply, reuse it.
-        bool CanReuse = true;
-        bool isTied = MI.isRegTiedToDefOperand(i);
-        if (isTied) {
-          // Okay, we have a two address operand.  We can reuse this physreg as
-          // long as we are allowed to clobber the value and there isn't an
-          // earlier def that has already clobbered the physreg.
-          CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-            Spills.canClobberPhysReg(PhysReg);
-        }
-        // If this is an asm, and a PhysReg alias is used elsewhere as an
-        // earlyclobber operand, we can't also use it as an input.
-        if (MI.isInlineAsm()) {
-          for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
-            MachineOperand &MOk = MI.getOperand(k);
-            if (MOk.isReg() && MOk.isEarlyClobber() &&
-                TRI->regsOverlap(MOk.getReg(), PhysReg)) {
-              CanReuse = false;
-              DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
-                           << " for vreg" << VirtReg << ": " << MOk << '\n');
-              break;
-            }
-          }
-        }
-
-        if (CanReuse) {
-          // If this stack slot value is already available, reuse it!
-          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-            DEBUG(dbgs() << "Reusing RM#"
-                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-          else
-            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-          DEBUG(dbgs() << " from physreg "
-                << TRI->getName(PhysReg) << " for vreg"
-                << VirtReg <<" instead of reloading into physreg "
-                << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
-          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
-
-          // The only technical detail we have is that we don't know that
-          // PhysReg won't be clobbered by a reloaded stack slot that occurs
-          // later in the instruction.  In particular, consider 'op V1, V2'.
-          // If V1 is available in physreg R0, we would choose to reuse it
-          // here, instead of reloading it into the register the allocator
-          // indicated (say R1).  However, V2 might have to be reloaded
-          // later, and it might indicate that it needs to live in R0.  When
-          // this occurs, we need to have information available that
-          // indicates it is safe to use R1 for the reload instead of R0.
-          //
-          // To further complicate matters, we might conflict with an alias,
-          // or R0 and R1 might not be compatible with each other.  In this
-          // case, we actually insert a reload for V1 in R1, ensuring that
-          // we can get at R0 or its alias.
-          ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                  VRM->getPhys(VirtReg), VirtReg);
-          if (isTied)
-            // Only mark it clobbered if this is a use&def operand.
-            ReusedOperands.markClobbered(PhysReg);
-          ++NumReused;
-
-          if (MI.getOperand(i).isKill() &&
-              ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-            // The store of this spilled value is potentially dead, but we
-            // won't know for certain until we've confirmed that the re-use
-            // above is valid, which means waiting until the other operands
-            // are processed. For now we just track the spill slot, we'll
-            // remove it after the other operands are processed if valid.
-
-            PotentialDeadStoreSlots.push_back(ReuseSlot);
-          }
-
-          // Mark is isKill if it's there no other uses of the same virtual
-          // register and it's not a two-address operand. IsKill will be
-          // unset if reg is reused.
-          if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-            MI.getOperand(i).setIsKill();
-            KilledMIRegs.insert(VirtReg);
-          }
-
-          continue;
-        }  // CanReuse
-
-        // Otherwise we have a situation where we have a two-address instruction
-        // whose mod/ref operand needs to be reloaded.  This reload is already
-        // available in some register "PhysReg", but if we used PhysReg as the
-        // operand to our 2-addr instruction, the instruction would modify
-        // PhysReg.  This isn't cool if something later uses PhysReg and expects
-        // to get its initial value.
-        //
-        // To avoid this problem, and to avoid doing a load right after a store,
-        // we emit a copy from PhysReg into the designated register for this
-        // operand.
-        //
-        // This case also applies to an earlyclobber'd PhysReg.
-        unsigned DesignatedReg = VRM->getPhys(VirtReg);
-        assert(DesignatedReg && "Must map virtreg to physreg!");
-
-        // Note that, if we reused a register for a previous operand, the
-        // register we want to reload into might not actually be
-        // available.  If this occurs, use the register indicated by the
-        // reuser.
-        if (ReusedOperands.hasReuses())
-          DesignatedReg = ReusedOperands.
-            GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
-                            MaybeDeadStores, RegKills, KillOps, *VRM);
-
-        // If the mapped designated register is actually the physreg we have
-        // incoming, we don't need to inserted a dead copy.
-        if (DesignatedReg == PhysReg) {
-          // If this stack slot value is already available, reuse it!
-          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-            DEBUG(dbgs() << "Reusing RM#"
-                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-          else
-            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-          DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-                << " for vreg" << VirtReg
-                << " instead of reloading into same physreg.\n");
-          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
-          ReusedOperands.markClobbered(RReg);
-          ++NumReused;
-          continue;
-        }
-
-        MRI->setPhysRegUsed(DesignatedReg);
-        ReusedOperands.markClobbered(DesignatedReg);
-
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                           SSorRMId, TII, MF);
-        MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
-                                       TII->get(TargetOpcode::COPY),
-                                       DesignatedReg).addReg(PhysReg);
-        CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-        UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-        // This invalidates DesignatedReg.
-        Spills.ClobberPhysReg(DesignatedReg);
-
-        Spills.addAvailable(ReuseSlot, DesignatedReg);
-        unsigned RReg =
-          SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-        DEBUG(dbgs() << '\t' << *prior(MII));
-        ++NumReused;
-        continue;
-      } // if (PhysReg)
-
-        // Otherwise, reload it and remember that we have it.
-      PhysReg = VRM->getPhys(VirtReg);
-      assert(PhysReg && "Must map virtreg to physreg!");
-
-      // Note that, if we reused a register for a previous operand, the
-      // register we want to reload into might not actually be
-      // available.  If this occurs, use the register indicated by the
-      // reuser.
-      if (ReusedOperands.hasReuses())
-        PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                    Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
-      MRI->setPhysRegUsed(PhysReg);
-      ReusedOperands.markClobbered(PhysReg);
-      if (AvoidReload)
-        ++NumAvoided;
-      else {
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
-                           SSorRMId, TII, MF);
-
-        if (DoReMat) {
-          ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
-        } else {
-          const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM->addSpillSlotUse(SSorRMId, LoadMI);
-          ++NumLoads;
-          DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-        }
-        // This invalidates PhysReg.
-        Spills.ClobberPhysReg(PhysReg);
-
-        // Any stores to this stack slot are not dead anymore.
-        if (!DoReMat)
-          MaybeDeadStores[SSorRMId] = NULL;
-        Spills.addAvailable(SSorRMId, PhysReg);
-        // Assumes this is the last use. IsKill will be unset if reg is reused
-        // unless it's a two-address operand.
-        if (!MI.isRegTiedToDefOperand(i) &&
-            KilledMIRegs.count(VirtReg) == 0) {
-          MI.getOperand(i).setIsKill();
-          KilledMIRegs.insert(VirtReg);
-        }
-
-        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-      }
-      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-    }
-
-    // Ok - now we can remove stores that have been confirmed dead.
-    for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-      // This was the last use and the spilled value is still available
-      // for reuse. That means the spill was unnecessary!
-      int PDSSlot = PotentialDeadStoreSlots[j];
-      MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-      if (DeadStore) {
-        DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-        InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-        VRM->RemoveMachineInstrFromMaps(DeadStore);
-        MBB->erase(DeadStore);
-        MaybeDeadStores[PDSSlot] = NULL;
-        ++NumDSE;
-      }
-    }
 
+    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
 
     DEBUG(dbgs() << '\t' << MI);
 
@@ -2288,14 +2345,13 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               BackTracked = true;
             } else {
               DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              // Unset last kill since it's being reused.
-              InvalidateKill(InReg, TRI, RegKills, KillOps);
+              // InvalidateKills resurrects any prior kill of the copy's source
+              // allowing the source reg to be reused in place of the copy.
               Spills.disallowClobberPhysReg(InReg);
             }
 
             InvalidateKills(MI, TRI, RegKills, KillOps);
-            VRM->RemoveMachineInstrFromMaps(&MI);
-            MBB->erase(&MI);
+            EraseInstr(&MI);
             Erased = true;
             goto ProcessNextInst;
           }
@@ -2306,8 +2362,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
             MBB->insert(MII, NewMIs[0]);
             InvalidateKills(MI, TRI, RegKills, KillOps);
-            VRM->RemoveMachineInstrFromMaps(&MI);
-            MBB->erase(&MI);
+            EraseInstr(&MI);
             Erased = true;
             --NextMII;  // backtrack to the unfolded instruction.
             BackTracked = true;
@@ -2343,8 +2398,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
               MBB->insert(MII, NewStore);
               VRM->addSpillSlotUse(SS, NewStore);
               InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM->RemoveMachineInstrFromMaps(&MI);
-              MBB->erase(&MI);
+              EraseInstr(&MI);
               Erased = true;
               --NextMII;
               --NextMII;  // backtrack to the unfolded instruction.
@@ -2359,8 +2413,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           // If we get here, the store is dead, nuke it now.
           DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          VRM->RemoveMachineInstrFromMaps(DeadStore);
-          MBB->erase(DeadStore);
+          EraseInstr(DeadStore);
           if (!NewStore)
             ++NumDSE;
         }
@@ -2437,8 +2490,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
             // Last def is now dead.
             TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
           }
-          VRM->RemoveMachineInstrFromMaps(&MI);
-          MBB->erase(&MI);
+          EraseInstr(&MI);
           Erased = true;
           Spills.disallowClobberPhysReg(VirtReg);
           goto ProcessNextInst;
@@ -2514,8 +2566,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
           ++NumDCE;
           DEBUG(dbgs() << "Removing now-noop copy: " << MI);
           InvalidateKills(MI, TRI, RegKills, KillOps);
-          VRM->RemoveMachineInstrFromMaps(&MI);
-          MBB->erase(&MI);
+          EraseInstr(&MI);
           Erased = true;
           UpdateKills(*LastStore, TRI, RegKills, KillOps);
           goto ProcessNextInst;
@@ -2526,8 +2577,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
     // Delete dead instructions without side effects.
     if (!Erased && !BackTracked && isSafeToDelete(MI)) {
       InvalidateKills(MI, TRI, RegKills, KillOps);
-      VRM->RemoveMachineInstrFromMaps(&MI);
-      MBB->erase(&MI);
+      EraseInstr(&MI);
       Erased = true;
     }
     if (!Erased)
author	Dimitry Andric <dim@FreeBSD.org>	2011-02-20 12:57:14 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2011-02-20 12:57:14 +0000
commit	cf099d11218cb6f6c5cce947d6738e347f07fb12 (patch)
tree	d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/CodeGen
parent	49011b52fcba02a6051957b84705159f52fae4e4 (diff)