From 06f9d4012fb8acea3e9861d5722b5965dbb724d9 Mon Sep 17 00:00:00 2001
From: Roman Divacky <rdivacky@FreeBSD.org>
Date: Tue, 1 Dec 2009 11:07:05 +0000
Subject: Update LLVM to r90226.

---
 lib/CodeGen/AggressiveAntiDepBreaker.cpp          |  241 +-
 lib/CodeGen/AggressiveAntiDepBreaker.h            |   22 +-
 lib/CodeGen/AntiDepBreaker.h                      |   17 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp             |   13 +-
 lib/CodeGen/AsmPrinter/DIE.cpp                    |  102 +-
 lib/CodeGen/AsmPrinter/DIE.h                      |   76 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp             | 1271 +++--
 lib/CodeGen/AsmPrinter/DwarfDebug.h               |  292 +-
 lib/CodeGen/AsmPrinter/DwarfException.cpp         |   35 +-
 lib/CodeGen/AsmPrinter/DwarfWriter.cpp            |   16 +-
 lib/CodeGen/BranchFolding.cpp                     |  228 +-
 lib/CodeGen/BranchFolding.h                       |    8 -
 lib/CodeGen/CMakeLists.txt                        |    1 +
 lib/CodeGen/CriticalAntiDepBreaker.cpp            |    1 -
 lib/CodeGen/CriticalAntiDepBreaker.h              |    9 +-
 lib/CodeGen/DwarfEHPrepare.cpp                    |    2 +-
 lib/CodeGen/IfConversion.cpp                      |    2 +-
 lib/CodeGen/LLVMTargetMachine.cpp                 |   23 +-
 lib/CodeGen/LatencyPriorityQueue.cpp              |   12 -
 lib/CodeGen/LiveIntervalAnalysis.cpp              |   11 +-
 lib/CodeGen/LiveVariables.cpp                     |   96 +-
 lib/CodeGen/MachineBasicBlock.cpp                 |   62 +-
 lib/CodeGen/MachineFunction.cpp                   |   12 +-
 lib/CodeGen/MachineInstr.cpp                      |    9 +-
 lib/CodeGen/MachineLICM.cpp                       |   54 +-
 lib/CodeGen/MachineModuleInfo.cpp                 |   72 -
 lib/CodeGen/MachineVerifier.cpp                   |    2 +-
 lib/CodeGen/PHIElimination.cpp                    |   34 +-
 lib/CodeGen/PHIElimination.h                      |    6 -
 lib/CodeGen/PostRASchedulerList.cpp               |  145 +-
 lib/CodeGen/ProcessImplicitDefs.cpp               |   67 +-
 lib/CodeGen/RegAllocLinearScan.cpp                |   74 +-
 lib/CodeGen/ScheduleDAG.cpp                       |   20 +-
 lib/CodeGen/ScheduleDAGPrinter.cpp                |   10 +-
 lib/CodeGen/SelectionDAG/CMakeLists.txt           |    3 +-
 lib/CodeGen/SelectionDAG/FastISel.cpp             |    3 +-
 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp |  355 ++
 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h   |  151 +
 lib/CodeGen/SelectionDAG/InstrEmitter.cpp         |    3 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp          |   48 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp        |   80 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.h          |    2 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp    |   57 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp  |    6 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp         |  114 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp    | 6110 ---------------------
 lib/CodeGen/SelectionDAG/SelectionDAGBuild.h      |  579 --
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp  | 5821 ++++++++++++++++++++
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h    |  487 ++
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp     |  238 +-
 lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp  |   34 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp       |    5 -
 lib/CodeGen/SimpleRegisterCoalescing.cpp          |   18 +-
 lib/CodeGen/Spiller.cpp                           |   19 +-
 lib/CodeGen/Spiller.h                             |    3 +-
 lib/CodeGen/TailDuplication.cpp                   |  249 +
 lib/CodeGen/VirtRegRewriter.cpp                   |    2 +-
 57 files changed, 8704 insertions(+), 8728 deletions(-)
 create mode 100644 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
 create mode 100644 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
 delete mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
 delete mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
 create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
 create mode 100644 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
 create mode 100644 lib/CodeGen/TailDuplication.cpp

(limited to 'lib/CodeGen')

diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index c37c793b56d01..8e3f8e7704868 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -28,10 +28,15 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
 static cl::opt<int>
-AntiDepTrials("agg-antidep-trials",
-              cl::desc("Maximum number of anti-dependency breaking passes"),
-              cl::init(1), cl::Hidden);
+DebugDiv("agg-antidep-debugdiv",
+                      cl::desc("Debug control for aggressive anti-dep breaker"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+                      cl::desc("Debug control for aggressive anti-dep breaker"),
+                      cl::init(0), cl::Hidden);
 
 AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) :
   GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) {
@@ -108,7 +113,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
   AllocatableSet(TRI->getAllocatableSet(MF)),
-  State(NULL), SavedState(NULL) {
+  State(NULL) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
   for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -128,13 +133,6 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
 
 AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
   delete State;
-  delete SavedState;
-}
-
-unsigned AggressiveAntiDepBreaker::GetMaxTrials() {
-  if (AntiDepTrials <= 0)
-    return 1;
-  return AntiDepTrials;
 }
 
 void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
@@ -206,8 +204,6 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 void AggressiveAntiDepBreaker::FinishBlock() {
   delete State;
   State = NULL;
-  delete SavedState;
-  SavedState = NULL;
 }
 
 void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -241,10 +237,6 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     }
   }
   DEBUG(errs() << '\n');
-
-  // We're starting a new schedule region so forget any saved state.
-  delete SavedState;
-  SavedState = NULL;
 }
 
 bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
@@ -283,27 +275,20 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
   }
 }
 
-/// AntiDepEdges - Return in Edges the anti- and output-
-/// dependencies on Regs in SU that we want to consider for breaking.
-static void AntiDepEdges(SUnit *SU, 
-                         const AntiDepBreaker::AntiDepRegVector& Regs,
-                         std::vector<SDep*>& Edges) {
-  AntiDepBreaker::AntiDepRegSet RegSet;
-  for (unsigned i = 0, e = Regs.size(); i < e; ++i)
-    RegSet.insert(Regs[i]);
-
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) {
+  SmallSet<unsigned, 4> RegSet;
   for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
        P != PE; ++P) {
     if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
       unsigned Reg = P->getReg();
-      if (RegSet.count(Reg) != 0) {
+      if (RegSet.count(Reg) == 0) {
         Edges.push_back(&*P);
-        RegSet.erase(Reg);
+        RegSet.insert(Reg);
       }
     }
   }
-
-  assert(RegSet.empty() && "Expected all antidep registers to be found");
 }
 
 /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
@@ -332,7 +317,8 @@ static SUnit *CriticalPathStep(SUnit *SU) {
 }
 
 void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
-                                             const char *tag) {
+                                             const char *tag, const char *header,
+                                             const char *footer) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
@@ -343,6 +329,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
     DefIndices[Reg] = ~0u;
     RegRefs.erase(Reg);
     State->LeaveGroup(Reg);
+    DEBUG(if (header != NULL) {
+        errs() << header << TRI->getName(Reg); header = NULL; });
     DEBUG(errs() << "->g" << State->GetGroup(Reg) << tag);
   }
   // Repeat for subregisters.
@@ -354,10 +342,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
       DefIndices[SubregReg] = ~0u;
       RegRefs.erase(SubregReg);
       State->LeaveGroup(SubregReg);
+      DEBUG(if (header != NULL) {
+          errs() << header << TRI->getName(Reg); header = NULL; });
       DEBUG(errs() << " " << TRI->getName(SubregReg) << "->g" <<
             State->GetGroup(SubregReg) << tag);
     }
   }
+
+  DEBUG(if ((header == NULL) && (footer != NULL)) errs() << footer);
 }
 
 void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count,
@@ -377,9 +369,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
     
-    DEBUG(errs() << "\tDead Def: " << TRI->getName(Reg));
-    HandleLastUse(Reg, Count + 1, "");
-    DEBUG(errs() << '\n');
+    HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
   }
 
   DEBUG(errs() << "\tDef Groups:");
@@ -427,15 +417,17 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    // Ignore passthru registers for liveness...
-    if (PassthruRegs.count(Reg) != 0) continue;
+    // Ignore KILLs and passthru registers for liveness...
+    if ((MI->getOpcode() == TargetInstrInfo::KILL) ||
+        (PassthruRegs.count(Reg) != 0))
+      continue;
 
-    // Update def for Reg and subregs.
+    // Update def for Reg and aliases.
     DefIndices[Reg] = Count;
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-         *Subreg; ++Subreg) {
-      unsigned SubregReg = *Subreg;
-      DefIndices[SubregReg] = Count;
+    for (const unsigned *Alias = TRI->getAliasSet(Reg);
+         *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      DefIndices[AliasReg] = Count;
     }
   }
 }
@@ -589,72 +581,108 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
       return false;
   }
 
-  // FIXME: for now just handle single register in group case...
-  if (Regs.size() > 1) {
-    DEBUG(errs() << "\tMultiple rename registers in group\n");
-    return false;
+#ifndef NDEBUG
+  // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+  if (DebugDiv > 0) {
+    static int renamecnt = 0;
+    if (renamecnt++ % DebugDiv != DebugMod)
+      return false;
+    
+    errs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+      " for debug ***\n";
   }
+#endif
 
   // Check each possible rename register for SuperReg in round-robin
   // order. If that register is available, and the corresponding
   // registers are available for the other group subregisters, then we
   // can use those registers to rename.
-  BitVector SuperBV = RenameRegisterMap[SuperReg];
   const TargetRegisterClass *SuperRC = 
     TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
   
   const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
   const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
   if (RB == RE) {
-    DEBUG(errs() << "\tEmpty Regclass!!\n");
+    DEBUG(errs() << "\tEmpty Super Regclass!!\n");
     return false;
   }
 
+  DEBUG(errs() << "\tFind Registers:");
+
   if (RenameOrder.count(SuperRC) == 0)
     RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
 
-  DEBUG(errs() << "\tFind Register:");
-
   const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
   const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
   TargetRegisterClass::iterator R = OrigR;
   do {
     if (R == RB) R = RE;
     --R;
-    const unsigned Reg = *R;
+    const unsigned NewSuperReg = *R;
     // Don't replace a register with itself.
-    if (Reg == SuperReg) continue;
-    
-    DEBUG(errs() << " " << TRI->getName(Reg));
+    if (NewSuperReg == SuperReg) continue;
     
-    // If Reg is dead and Reg's most recent def is not before
-    // SuperRegs's kill, it's safe to replace SuperReg with Reg. We
-    // must also check all subregisters of Reg.
-    if (State->IsLive(Reg) || (KillIndices[SuperReg] > DefIndices[Reg])) {
-      DEBUG(errs() << "(live)");
-      continue;
-    } else {
-      bool found = false;
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        unsigned SubregReg = *Subreg;
-        if (State->IsLive(SubregReg) || (KillIndices[SuperReg] > DefIndices[SubregReg])) {
-          DEBUG(errs() << "(subreg " << TRI->getName(SubregReg) << " live)");
-          found = true;
-          break;
+    DEBUG(errs() << " [" << TRI->getName(NewSuperReg) << ':');
+    RenameMap.clear();
+
+    // For each referenced group register (which must be a SuperReg or
+    // a subregister of SuperReg), find the corresponding subregister
+    // of NewSuperReg and make sure it is free to be renamed.
+    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+      unsigned Reg = Regs[i];
+      unsigned NewReg = 0;
+      if (Reg == SuperReg) {
+        NewReg = NewSuperReg;
+      } else {
+        unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+        if (NewSubRegIdx != 0)
+          NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+      }
+
+      DEBUG(errs() << " " << TRI->getName(NewReg));
+      
+      // Check if Reg can be renamed to NewReg.
+      BitVector BV = RenameRegisterMap[Reg];
+      if (!BV.test(NewReg)) {
+        DEBUG(errs() << "(no rename)");
+        goto next_super_reg;
+      }
+
+      // If NewReg is dead and NewReg's most recent def is not before
+      // Regs's kill, it's safe to replace Reg with NewReg. We
+      // must also check all aliases of NewReg, because we can't define a
+      // register when any sub or super is already live.
+      if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+        DEBUG(errs() << "(live)");
+        goto next_super_reg;
+      } else {
+        bool found = false;
+        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+             *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) {
+            DEBUG(errs() << "(alias " << TRI->getName(AliasReg) << " live)");
+            found = true;
+            break;
+          }
         }
+        if (found)
+          goto next_super_reg;
       }
-      if (found)
-        continue;
+      
+      // Record that 'Reg' can be renamed to 'NewReg'.
+      RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
     }
     
-    if (Reg != 0) { 
-      DEBUG(errs() << '\n');
-      RenameOrder.erase(SuperRC);
-      RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
-      RenameMap.insert(std::pair<unsigned, unsigned>(SuperReg, Reg));
-      return true;
-    }
+    // If we fall-out here, then every register in the group can be
+    // renamed, as recorded in RenameMap.
+    RenameOrder.erase(SuperRC);
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+    DEBUG(errs() << "]\n");
+    return true;
+
+  next_super_reg:
+    DEBUG(errs() << ']');
   } while (R != EndR);
 
   DEBUG(errs() << '\n');
@@ -668,7 +696,6 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 ///
 unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                               std::vector<SUnit>& SUnits,
-                              CandidateMap& Candidates,
                               MachineBasicBlock::iterator& Begin,
                               MachineBasicBlock::iterator& End,
                               unsigned InsertPosIndex) {
@@ -681,16 +708,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
   
-  // Manage saved state to enable multiple passes...
-  if (AntiDepTrials > 1) {
-    if (SavedState == NULL) {
-      SavedState = new AggressiveAntiDepState(*State);
-    } else {
-      delete State;
-      State = new AggressiveAntiDepState(*SavedState);
-    }
-  }
-  
   // For each regclass the next register to use for renaming.
   RenameOrderType RenameOrder;
 
@@ -719,21 +736,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     CriticalPathMI = CriticalPathSU->getInstr();
   }
 
-  // Even if there are no anti-dependencies we still need to go
-  // through the instructions to update Def, Kills, etc.
 #ifndef NDEBUG 
-  if (Candidates.empty()) {
-    DEBUG(errs() << "\n===== No anti-dependency candidates\n");
-  } else {
-    DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << 
-          " anti-dependencies\n");
-    DEBUG(errs() << "Available regs:");
-    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
-      if (!State->IsLive(Reg))
-        DEBUG(errs() << " " << TRI->getName(Reg));
-    }
-    DEBUG(errs() << '\n');
+  DEBUG(errs() << "\n===== Aggressive anti-dependency breaking\n");
+  DEBUG(errs() << "Available regs:");
+  for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+    if (!State->IsLive(Reg))
+      DEBUG(errs() << " " << TRI->getName(Reg));
   }
+  DEBUG(errs() << '\n');
 #endif
 
   // Attempt to break anti-dependence edges. Walk the instructions
@@ -754,14 +764,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     // Process the defs in MI...
     PrescanInstruction(MI, Count, PassthruRegs);
     
-    // The the dependence edges that represent anti- and output-
+    // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
     std::vector<SDep*> Edges;
     SUnit *PathSU = MISUnitMap[MI];
-    AntiDepBreaker::CandidateMap::iterator 
-      citer = Candidates.find(PathSU);
-    if (citer != Candidates.end())
-      AntiDepEdges(PathSU, citer->second, Edges);
+    AntiDepEdges(PathSU, Edges);
 
     // If MI is not on the critical path, then we don't rename
     // registers in the CriticalPathSet.
@@ -817,12 +824,32 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
           // anti-dependency since those edges would prevent such
           // units from being scheduled past each other
           // regardless.
+          //
+          // Also, if there are dependencies on other SUnits with the
+          // same register as the anti-dependency, don't attempt to
+          // break it.
+          for (SUnit::pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if (P->getSUnit() == NextSU ?
+                (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+              AntiDepReg = 0;
+              break;
+            }
+          }
           for (SUnit::pred_iterator P = PathSU->Preds.begin(),
                  PE = PathSU->Preds.end(); P != PE; ++P) {
-            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti)) {
+            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+                (P->getKind() != SDep::Output)) {
               DEBUG(errs() << " (real dependency)\n");
               AntiDepReg = 0;
               break;
+            } else if ((P->getSUnit() != NextSU) && 
+                       (P->getKind() == SDep::Data) && 
+                       (P->getReg() == AntiDepReg)) {
+              DEBUG(errs() << " (other dependency)\n");
+              AntiDepReg = 0;
+              break;
             }
           }
           
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index e5c9a7bb3adfb..8154d2dd57256 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -27,12 +27,11 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include <map>
 
 namespace llvm {
   /// Class AggressiveAntiDepState 
-  /// Contains all the state necessary for anti-dep breaking. We place
-  /// into a separate class so be can conveniently save/restore it to
-  /// enable multi-pass anti-dep breaking.
+  /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
   public:
     /// RegisterReference - Information about a register reference
@@ -126,23 +125,11 @@ namespace llvm {
     /// registers.
     AggressiveAntiDepState *State;
 
-    /// SavedState - The state for the start of an anti-dep
-    /// region. Used to restore the state at the beginning of each
-    /// pass
-    AggressiveAntiDepState *SavedState;
-
   public:
     AggressiveAntiDepBreaker(MachineFunction& MFi, 
                              TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
     
-    /// GetMaxTrials - As anti-dependencies are broken, additional
-    /// dependencies may be exposed, so multiple passes are required.
-    unsigned GetMaxTrials();
-
-    /// NeedCandidates - Candidates required.
-    bool NeedCandidates() { return true; }
-
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
@@ -150,7 +137,6 @@ namespace llvm {
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   CandidateMap& Candidates,
                                    MachineBasicBlock::iterator& Begin,
                                    MachineBasicBlock::iterator& End,
                                    unsigned InsertPosIndex);
@@ -175,7 +161,9 @@ namespace llvm {
     /// return that register and all subregisters.
     void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
 
-    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag);
+    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+                       const char *header =NULL, const char *footer =NULL);
+
     void PrescanInstruction(MachineInstr *MI, unsigned Count,
                             std::set<unsigned>& PassthruRegs);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index b614f687a4629..3ee30c6a18e35 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -21,9 +21,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
+#include <vector>
 
 namespace llvm {
 
@@ -32,20 +30,8 @@ namespace llvm {
 /// anti-dependencies.
 class AntiDepBreaker {
 public:
-  typedef SmallSet<unsigned, 4> AntiDepRegSet;
-  typedef SmallVector<unsigned, 4> AntiDepRegVector;
-  typedef std::map<SUnit *, AntiDepRegVector> CandidateMap;
-
   virtual ~AntiDepBreaker();
 
-  /// GetMaxTrials - Return the maximum number of anti-dependence
-  /// breaking attempts that will be made for a block.
-  virtual unsigned GetMaxTrials() =0;
-
-  /// NeedCandidates - Return true if the schedule must provide
-  /// candidates with BreakAntiDependencies().
-  virtual bool NeedCandidates() =0;
-
   /// Start - Initialize anti-dep breaking for a new basic block.
   virtual void StartBlock(MachineBasicBlock *BB) =0;
 
@@ -54,7 +40,6 @@ public:
   /// the number of anti-dependencies broken.
   ///
   virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                CandidateMap& Candidates,
                                 MachineBasicBlock::iterator& Begin,
                                 MachineBasicBlock::iterator& End,
                                 unsigned InsertPosIndex) =0;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 08e0eae16c35d..993cdbfb76c78 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -728,7 +728,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
 /// EmitString - Emit a string with quotes and a null terminator.
 /// Special characters are emitted properly.
 /// \literal (Eg. '\t') \endliteral
-void AsmPrinter::EmitString(const std::string &String) const {
+void AsmPrinter::EmitString(const StringRef String) const {
   EmitString(String.data(), String.size());
 }
 
@@ -1630,12 +1630,14 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
-MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
-  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA,
+                                            const char *Suffix) const {
+  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock(), Suffix);
 }
 
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
-                                            const BasicBlock *BB) const {
+                                            const BasicBlock *BB,
+                                            const char *Suffix) const {
   assert(BB->hasName() &&
          "Address of anonymous basic block not supported yet!");
 
@@ -1647,7 +1649,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   SmallString<60> Name;
   raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
     << FuncName.size() << '_' << FuncName << '_'
-    << Mang->makeNameProper(BB->getName());
+    << Mang->makeNameProper(BB->getName())
+    << Suffix;
 
   return OutContext.GetOrCreateSymbol(Name.str());
 }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index ecf00077fc314..0e93b9849ce52 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -105,26 +105,14 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
 ///
-void DIE::AddSiblingOffset() {
+void DIE::addSiblingOffset() {
   DIEInteger *DI = new DIEInteger(0);
   Values.insert(Values.begin(), DI);
   Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIE::Profile(FoldingSetNodeID &ID) {
-  Abbrev.Profile(ID);
-
-  for (unsigned i = 0, N = Children.size(); i < N; ++i)
-    ID.AddPointer(Children[i]);
-
-  for (unsigned j = 0, M = Values.size(); j < M; ++j)
-    ID.AddPointer(Values[j]);
-}
-
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
@@ -231,16 +219,6 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const {
   return 0;
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) {
-  ID.AddInteger(isInteger);
-  ID.AddInteger(Int);
-}
-void DIEInteger::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Integer);
-}
-
 #ifndef NDEBUG
 void DIEInteger::print(raw_ostream &O) {
   O << "Int: " << (int64_t)Integer
@@ -258,16 +236,6 @@ void DIEString::EmitValue(Dwarf *D, unsigned Form) const {
   D->getAsm()->EmitString(Str);
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) {
-  ID.AddInteger(isString);
-  ID.AddString(Str);
-}
-void DIEString::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Str);
-}
-
 #ifndef NDEBUG
 void DIEString::print(raw_ostream &O) {
   O << "Str: \"" << Str << "\"";
@@ -292,16 +260,6 @@ unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) {
-  ID.AddInteger(isLabel);
-  Label.Profile(ID);
-}
-void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label);
-}
-
 #ifndef NDEBUG
 void DIEDwarfLabel::print(raw_ostream &O) {
   O << "Lbl: ";
@@ -327,16 +285,6 @@ unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) {
-  ID.AddInteger(isAsIsLabel);
-  ID.AddString(Label);
-}
-void DIEObjectLabel::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label.c_str());
-}
-
 #ifndef NDEBUG
 void DIEObjectLabel::print(raw_ostream &O) {
   O << "Obj: " << Label;
@@ -363,20 +311,6 @@ unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label,
-                               const DWLabel &Section) {
-  ID.AddInteger(isSectionOffset);
-  Label.Profile(ID);
-  Section.Profile(ID);
-  // IsEH and UseSet are specific to the Label/Section that we will emit the
-  // offset for; so Label/Section are enough for uniqueness.
-}
-void DIESectionOffset::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, Label, Section);
-}
-
 #ifndef NDEBUG
 void DIESectionOffset::print(raw_ostream &O) {
   O << "Off: ";
@@ -405,18 +339,6 @@ unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const {
   return TD->getPointerSize();
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
-                       const DWLabel &LabelLo) {
-  ID.AddInteger(isDelta);
-  LabelHi.Profile(ID);
-  LabelLo.Profile(ID);
-}
-void DIEDelta::Profile(FoldingSetNodeID &ID) {
-  Profile(ID, LabelHi, LabelLo);
-}
-
 #ifndef NDEBUG
 void DIEDelta::print(raw_ostream &O) {
   O << "Del: ";
@@ -436,21 +358,6 @@ void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const {
   D->getAsm()->EmitInt32(Entry->getOffset());
 }
 
-/// Profile - Used to gather unique data for the value folding set.
-///
-void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) {
-  ID.AddInteger(isEntry);
-  ID.AddPointer(Entry);
-}
-void DIEEntry::Profile(FoldingSetNodeID &ID) {
-  ID.AddInteger(isEntry);
-
-  if (Entry)
-    ID.AddPointer(Entry);
-  else
-    ID.AddPointer(this);
-}
-
 #ifndef NDEBUG
 void DIEEntry::print(raw_ostream &O) {
   O << format("Die: 0x%lx", (long)(intptr_t)Entry);
@@ -505,11 +412,6 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const {
   return 0;
 }
 
-void DIEBlock::Profile(FoldingSetNodeID &ID) {
-  ID.AddInteger(isBlock);
-  DIE::Profile(ID);
-}
-
 #ifndef NDEBUG
 void DIEBlock::print(raw_ostream &O) {
   O << "Blk: ";
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 3e50a15e162d3..dc6a70a6bd6a3 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -113,7 +113,7 @@ namespace llvm {
   class CompileUnit;
   class DIEValue;
 
-  class DIE : public FoldingSetNode {
+  class DIE {
   protected:
     /// Abbrev - Buffer for constructing abbreviation.
     ///
@@ -161,38 +161,28 @@ namespace llvm {
     void setSize(unsigned S) { Size = S; }
     void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; }
 
-    /// AddValue - Add a value and attributes to a DIE.
+    /// addValue - Add a value and attributes to a DIE.
     ///
-    void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+    void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
       Abbrev.AddAttribute(Attribute, Form);
       Values.push_back(Value);
     }
 
     /// SiblingOffset - Return the offset of the debug information entry's
     /// sibling.
-    unsigned SiblingOffset() const { return Offset + Size; }
+    unsigned getSiblingOffset() const { return Offset + Size; }
 
-    /// AddSiblingOffset - Add a sibling offset field to the front of the DIE.
+    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
     ///
-    void AddSiblingOffset();
+    void addSiblingOffset();
 
-    /// AddChild - Add a child to the DIE.
+    /// addChild - Add a child to the DIE.
     ///
-    void AddChild(DIE *Child) {
+    void addChild(DIE *Child) {
       Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
       Children.push_back(Child);
     }
 
-    /// Detach - Detaches objects connected to it after copying.
-    ///
-    void Detach() {
-      Children.clear();
-    }
-
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    void Profile(FoldingSetNodeID &ID) ;
-
 #ifndef NDEBUG
     void print(raw_ostream &O, unsigned IncIndent = 0);
     void dump();
@@ -202,7 +192,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIEValue - A debug information entry value.
   ///
-  class DIEValue : public FoldingSetNode {
+  class DIEValue {
   public:
     enum {
       isInteger,
@@ -233,10 +223,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    virtual void Profile(FoldingSetNodeID &ID) = 0;
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEValue *) { return true; }
 
@@ -277,10 +263,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, unsigned Int);
-    virtual void Profile(FoldingSetNodeID &ID);
 
     // Implement isa/cast/dyncast.
     static bool classof(const DIEInteger *) { return true; }
@@ -295,9 +277,9 @@ namespace llvm {
   /// DIEString - A string value DIE.
   ///
   class DIEString : public DIEValue {
-    const std::string Str;
+    const StringRef Str;
   public:
-    explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
+    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
 
     /// EmitValue - Emit string value.
     ///
@@ -309,11 +291,6 @@ namespace llvm {
       return Str.size() + sizeof(char); // sizeof('\0');
     }
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const std::string &Str);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEString *) { return true; }
     static bool classof(const DIEValue *S) { return S->getType() == isString; }
@@ -339,11 +316,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &Label);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEDwarfLabel *)  { return true; }
     static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
@@ -370,11 +342,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const std::string &Label);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEObjectLabel *) { return true; }
     static bool classof(const DIEValue *L) {
@@ -408,12 +375,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &Label,
-                        const DWLabel &Section);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIESectionOffset *)  { return true; }
     static bool classof(const DIEValue *D) {
@@ -443,12 +404,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi,
-                        const DWLabel &LabelLo);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEDelta *)  { return true; }
     static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
@@ -480,11 +435,6 @@ namespace llvm {
       return sizeof(int32_t);
     }
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    static void Profile(FoldingSetNodeID &ID, DIE *Entry);
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEEntry *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
@@ -525,10 +475,6 @@ namespace llvm {
     ///
     virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
 
-    /// Profile - Used to gather unique data for the value folding set.
-    ///
-    virtual void Profile(FoldingSetNodeID &ID);
-
     // Implement isa/cast/dyncast.
     static bool classof(const DIEBlock *)  { return true; }
     static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c62c43545c466..c2e1e0503a884 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,9 +39,7 @@ static TimerGroup &getDwarfTimerGroup() {
 
 /// Configuration values for initial hash set sizes (log2).
 ///
-static const unsigned InitDiesSetSize          = 9; // log2(512)
 static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
-static const unsigned InitValuesSetSize        = 9; // log2(512)
 
 namespace llvm {
 
@@ -55,70 +53,89 @@ class CompileUnit {
 
   /// Die - Compile unit debug information entry.
   ///
-  DIE *Die;
+  DIE *CUDie;
+
+  /// IndexTyDie - An anonymous type for index type.
+  DIE *IndexTyDie;
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
   /// variables to debug information entries.
   /// FIXME : Rename GVToDieMap -> NodeToDieMap
-  std::map<MDNode *, DIE *> GVToDieMap;
+  ValueMap<MDNode *, DIE *> GVToDieMap;
 
   /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
   /// descriptors to debug information entries using a DIEEntry proxy.
   /// FIXME : Rename
-  std::map<MDNode *, DIEEntry *> GVToDIEEntryMap;
+  ValueMap<MDNode *, DIEEntry *> GVToDIEEntryMap;
 
   /// Globals - A map of globally visible named entities for this unit.
   ///
   StringMap<DIE*> Globals;
 
-  /// DiesSet - Used to uniquely define dies within the compile unit.
+  /// GlobalTypes - A map of globally visible types for this unit.
   ///
-  FoldingSet<DIE> DiesSet;
+  StringMap<DIE*> GlobalTypes;
+
 public:
   CompileUnit(unsigned I, DIE *D)
-    : ID(I), Die(D), DiesSet(InitDiesSetSize) {}
-  ~CompileUnit() { delete Die; }
+    : ID(I), CUDie(D), IndexTyDie(0) {}
+  ~CompileUnit() { delete CUDie; delete IndexTyDie; }
 
   // Accessors.
-  unsigned getID() const { return ID; }
-  DIE* getDie() const { return Die; }
-  StringMap<DIE*> &getGlobals() { return Globals; }
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie; }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
   /// hasContent - Return true if this compile unit has something to write out.
   ///
-  bool hasContent() const { return !Die->getChildren().empty(); }
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
 
-  /// AddGlobal - Add a new global entity to the compile unit.
+  /// addGlobal - Add a new global entity to the compile unit.
   ///
-  void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+  void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(const std::string &Name, DIE *Die) { 
+    GlobalTypes[Name] = Die; 
+  }
 
-  /// getDieMapSlotFor - Returns the debug information entry map slot for the
+  /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
-  DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; }
+  DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); }
 
-  /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for
-  /// the specified debug variable.
-  DIEEntry *&getDIEEntrySlotFor(MDNode *N) {
-    return GVToDIEEntryMap[N];
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(MDNode *N, DIE *D) {
+    GVToDieMap.insert(std::make_pair(N, D));
   }
 
-  /// AddDie - Adds or interns the DIE to the compile unit.
+  /// getDIEEntry - Returns the debug information entry for the speciefied
+  /// debug variable.
+  DIEEntry *getDIEEntry(MDNode *N) { return GVToDIEEntryMap.lookup(N); }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(MDNode *N, DIEEntry *E) {
+    GVToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
   ///
-  DIE *AddDie(DIE &Buffer) {
-    FoldingSetNodeID ID;
-    Buffer.Profile(ID);
-    void *Where;
-    DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where);
-
-    if (!Die) {
-      Die = new DIE(Buffer);
-      DiesSet.InsertNode(Die, Where);
-      this->Die->AddChild(Die);
-      Buffer.Detach();
-    }
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
 
-    return Die;
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
   }
+
 };
 
 //===----------------------------------------------------------------------===//
@@ -147,7 +164,7 @@ public:
 ///
 class DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
-  DIDescriptor Desc;                  // Debug info descriptor for scope. 
+  DIDescriptor Desc;                  // Debug info descriptor for scope.
   WeakVH InlinedAtLocation;           // Location at which scope is inlined.
   bool AbstractScope;                 // Abstract Scope
   unsigned StartLabelID;              // Label ID of the beginning of scope.
@@ -162,7 +179,7 @@ class DbgScope {
 public:
   DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
-      StartLabelID(0), EndLabelID(0), 
+      StartLabelID(0), EndLabelID(0),
       LastInsn(0), FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
@@ -170,7 +187,7 @@ public:
   DbgScope *getParent()          const { return Parent; }
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()         const { 
+  MDNode *getInlinedAt()         const {
     return dyn_cast_or_null<MDNode>(InlinedAtLocation);
   }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
@@ -187,26 +204,26 @@ public:
   bool isAbstractScope() const { return AbstractScope; }
   const MachineInstr *getFirstInsn()      { return FirstInsn; }
 
-  /// AddScope - Add a scope to the scope.
+  /// addScope - Add a scope to the scope.
   ///
-  void AddScope(DbgScope *S) { Scopes.push_back(S); }
+  void addScope(DbgScope *S) { Scopes.push_back(S); }
 
-  /// AddVariable - Add a variable to the scope.
+  /// addVariable - Add a variable to the scope.
   ///
-  void AddVariable(DbgVariable *V) { Variables.push_back(V); }
+  void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void FixInstructionMarkers() {
+  void fixInstructionMarkers() {
     assert (getFirstInsn() && "First instruction is missing!");
     if (getLastInsn())
       return;
-    
+
     // If a scope does not have an instruction to mark an end then use
     // the end of last child scope.
     SmallVector<DbgScope *, 4> &Scopes = getScopes();
     assert (!Scopes.empty() && "Inner most scope does not have last insn!");
     DbgScope *L = Scopes.back();
     if (!L->getLastInsn())
-      L->FixInstructionMarkers();
+      L->fixInstructionMarkers();
     setLastInsn(L->getLastInsn());
   }
 
@@ -236,21 +253,6 @@ void DbgScope::dump() const {
 }
 #endif
 
-//===----------------------------------------------------------------------===//
-/// DbgConcreteScope - This class is used to track a scope that holds concrete
-/// instance information.
-///
-class DbgConcreteScope : public DbgScope {
-  CompileUnit *Unit;
-  DIE *Die;                           // Debug info for this concrete scope.
-public:
-  DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {}
-
-  // Accessors.
-  DIE *getDie() const { return Die; }
-  void setDie(DIE *D) { Die = D; }
-};
-
 DbgScope::~DbgScope() {
   for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
     delete Scopes[i];
@@ -263,7 +265,7 @@ DbgScope::~DbgScope() {
 DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : Dwarf(OS, A, T, "dbg"), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    ValuesSet(InitValuesSetSize), Values(), StringPool(),
+    DIEValues(), StringPool(),
     SectionSourceLines(), didInitial(false), shouldEmit(false),
     CurrentFnDbgScope(0), DebugTimer(0) {
   if (TimePassesIsEnabled)
@@ -271,15 +273,15 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
                            getDwarfTimerGroup());
 }
 DwarfDebug::~DwarfDebug() {
-  for (unsigned j = 0, M = Values.size(); j < M; ++j)
-    delete Values[j];
+  for (unsigned j = 0, M = DIEValues.size(); j < M; ++j)
+    delete DIEValues[j];
 
   delete DebugTimer;
 }
 
-/// AssignAbbrevNumber - Define a unique number for the abbreviation.
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
-void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Profile the node so that we can make it unique.
   FoldingSetNodeID ID;
   Abbrev.Profile(ID);
@@ -300,224 +302,120 @@ void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) {
   }
 }
 
-/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
 /// information entry.
-DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) {
-  DIEEntry *Value;
-
-  if (Entry) {
-    FoldingSetNodeID ID;
-    DIEEntry::Profile(ID, Entry);
-    void *Where;
-    Value = static_cast<DIEEntry *>(ValuesSet.FindNodeOrInsertPos(ID, Where));
-
-    if (Value) return Value;
-
-    Value = new DIEEntry(Entry);
-    ValuesSet.InsertNode(Value, Where);
-  } else {
-    Value = new DIEEntry(Entry);
-  }
-
-  Values.push_back(Value);
+DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new DIEEntry(Entry);
+  DIEValues.push_back(Value);
   return Value;
 }
 
-/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
-///
-void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) {
-  Value->setEntry(Entry);
-
-  // Add to values set if not already there.  If it is, we merely have a
-  // duplicate in the values list (no harm.)
-  ValuesSet.GetOrInsertNode(Value);
-}
-
-/// AddUInt - Add an unsigned integer attribute data and value.
+/// addUInt - Add an unsigned integer attribute data and value.
 ///
-void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute,
+void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
                          unsigned Form, uint64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(false, Integer);
-
-  FoldingSetNodeID ID;
-  DIEInteger::Profile(ID, Integer);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEInteger(Integer);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEInteger(Integer);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddSInt - Add an signed integer attribute data and value.
+/// addSInt - Add an signed integer attribute data and value.
 ///
-void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute,
+void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
                          unsigned Form, int64_t Integer) {
   if (!Form) Form = DIEInteger::BestForm(true, Integer);
-
-  FoldingSetNodeID ID;
-  DIEInteger::Profile(ID, (uint64_t)Integer);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEInteger(Integer);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEInteger(Integer);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddString - Add a string attribute data and value.
+/// addString - Add a string attribute data and value.
 ///
-void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form,
-                           const std::string &String) {
-  FoldingSetNodeID ID;
-  DIEString::Profile(ID, String);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEString(String);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                           const StringRef String) {
+  DIEValue *Value = new DIEString(String);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddLabel - Add a Dwarf label attribute data and value.
+/// addLabel - Add a Dwarf label attribute data and value.
 ///
-void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
                           const DWLabel &Label) {
-  FoldingSetNodeID ID;
-  DIEDwarfLabel::Profile(ID, Label);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEDwarfLabel(Label);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEDwarfLabel(Label);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+/// addObjectLabel - Add an non-Dwarf label attribute data and value.
 ///
-void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
                                 const std::string &Label) {
-  FoldingSetNodeID ID;
-  DIEObjectLabel::Profile(ID, Label);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEObjectLabel(Label);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEObjectLabel(Label);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddSectionOffset - Add a section offset label attribute data and value.
+/// addSectionOffset - Add a section offset label attribute data and value.
 ///
-void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
                                   const DWLabel &Label, const DWLabel &Section,
                                   bool isEH, bool useSet) {
-  FoldingSetNodeID ID;
-  DIESectionOffset::Profile(ID, Label, Section);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIESectionOffset(Label, Section, isEH, useSet);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIESectionOffset(Label, Section, isEH, useSet);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddDelta - Add a label delta attribute data and value.
+/// addDelta - Add a label delta attribute data and value.
 ///
-void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
                           const DWLabel &Hi, const DWLabel &Lo) {
-  FoldingSetNodeID ID;
-  DIEDelta::Profile(ID, Hi, Lo);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = new DIEDelta(Hi, Lo);
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  }
-
-  Die->AddValue(Attribute, Form, Value);
+  DIEValue *Value = new DIEDelta(Hi, Lo);
+  DIEValues.push_back(Value);
+  Die->addValue(Attribute, Form, Value);
 }
 
-/// AddBlock - Add block data.
+/// addBlock - Add block data.
 ///
-void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form,
+void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
                           DIEBlock *Block) {
   Block->ComputeSize(TD);
-  FoldingSetNodeID ID;
-  Block->Profile(ID);
-  void *Where;
-  DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where);
-
-  if (!Value) {
-    Value = Block;
-    ValuesSet.InsertNode(Value, Where);
-    Values.push_back(Value);
-  } else {
-    // Already exists, reuse the previous one.
-    delete Block;
-    Block = cast<DIEBlock>(Value);
-  }
-
-  Die->AddValue(Attribute, Block->BestForm(), Value);
+  DIEValues.push_back(Block);
+  Die->addValue(Attribute, Block->BestForm(), Block);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
   // If there is no compile unit specified, don't add a line #.
   if (V->getCompileUnit().isNull())
     return;
 
   unsigned Line = V->getLineNumber();
-  unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(V->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
   // If there is no compile unit specified, don't add a line #.
   if (G->getCompileUnit().isNull())
     return;
 
   unsigned Line = G->getLineNumber();
-  unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(G->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
+void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
   // If there is no compile unit specified, don't add a line #.
   if (SP->getCompileUnit().isNull())
     return;
@@ -527,25 +425,25 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) {
 
 
   unsigned Line = SP->getLineNumber();
-  unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
-/// AddSourceLine - Add location information to specified debug information
+/// addSourceLine - Add location information to specified debug information
 /// entry.
-void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
+void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
   // If there is no compile unit specified, don't add a line #.
   DICompileUnit CU = Ty->getCompileUnit();
   if (CU.isNull())
     return;
 
   unsigned Line = Ty->getLineNumber();
-  unsigned FileID = FindCompileUnit(CU).getID();
+  unsigned FileID = findCompileUnit(CU).getID();
   assert(FileID && "Invalid file id");
-  AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
-  AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as
@@ -571,12 +469,12 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) {
    side, the Debug Information Entry for the variable VarName needs to
    have a DW_AT_location that tells the debugger how to unwind through
    the pointers and __Block_byref_x_VarName struct to find the actual
-   value of the variable.  The function AddBlockByrefType does this.  */
+   value of the variable.  The function addBlockByrefType does this.  */
 
 /// Find the type the programmer originally declared the variable to be
 /// and return that type.
 ///
-DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
+DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
 
   DIType subType = Ty;
   unsigned tag = Ty.getTag();
@@ -596,19 +494,19 @@ DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) {
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
-    if (strcmp(Name.c_str(), DT.getName()) == 0)
+    if (Name == DT.getName())
       return (DT.getTypeDerivedFrom());
   }
 
   return Ty;
 }
 
-/// AddComplexAddress - Start with the address based on the location provided,
+/// addComplexAddress - Start with the address based on the location provided,
 /// and generate the DWARF information necessary to find the actual variable
 /// given the extra address information encoded in the DIVariable, starting from
 /// the starting location.  Add the DWARF information to the die.
 ///
-void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
+void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
                                    unsigned Attribute,
                                    const MachineLocation &Location) {
   const DIVariable &VD = DV->getVariable();
@@ -621,36 +519,36 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
 
   if (Location.isReg()) {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
       Reg = Reg - dwarf::DW_OP_reg0;
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
   for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
     uint64_t Element = VD.getAddrElement(i);
 
     if (Element == DIFactory::OpPlus) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
     } else if (Element == DIFactory::OpDeref) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     } else llvm_unreachable("unknown DIFactory Opcode");
   }
 
   // Now attach the location information to the DIE.
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -662,7 +560,7 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
    However, as far as the original *programmer* is concerned, the variable
    should still have type 'SomeType', as originally declared.
 
-   The function GetBlockByrefType dives into the __Block_byref_x_VarName
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
    struct to find the original type of the variable, which is then assigned to
    the variable's Debug Information Entry as its real type.  So far, so good.
    However now the debugger will expect the variable VarName to have the type
@@ -707,13 +605,13 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die,
 
    That is what this function does.  */
 
-/// AddBlockByrefAddress - Start with the address based on the location
+/// addBlockByrefAddress - Start with the address based on the location
 /// provided, and generate the DWARF information necessary to find the
 /// actual Block variable (navigating the Block struct) based on the
 /// starting location.  Add the DWARF information to the die.  For
 /// more information, read large comment just above here.
 ///
-void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
                                       unsigned Attribute,
                                       const MachineLocation &Location) {
   const DIVariable &VD = DV->getVariable();
@@ -722,7 +620,7 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
   unsigned Tag = Ty.getTag();
   bool isPointer = false;
 
-  const char *varName = VD.getName();
+  StringRef varName = VD.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
     DIDerivedType DTy = DIDerivedType(Ty.getNode());
@@ -742,10 +640,10 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
   for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Fields.getElement(i);
     DIDerivedType DT = DIDerivedType(Element.getNode());
-    const char *fieldName = DT.getName();
-    if (strcmp(fieldName, "__forwarding") == 0)
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
       forwardingField = Element;
-    else if (strcmp(fieldName, varName) == 0)
+    else if (fieldName == varName)
       varField = Element;
   }
 
@@ -766,148 +664,144 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die,
 
   if (Location.isReg()) {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     else {
       Reg = Reg - dwarf::DW_OP_reg0;
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32)
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Next add the offset for the '__forwarding' field:
   // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
   // adding the offset if it's 0.
   if (forwardingFieldOffset > 0) {
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
   }
 
   // Now dereference the __forwarding field to get to the real __Block_byref
   // struct:  DW_OP_deref.
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Now that we've got the real __Block_byref... struct, add the offset
   // for the variable's field to get to the location of the actual variable:
   // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
   if (varFieldOffset > 0) {
-    AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
   }
 
   // Now attach the location information to the DIE.
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
-/// AddAddress - Add an address attribute to a die based on the location
+/// addAddress - Add an address attribute to a die based on the location
 /// provided.
-void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute,
+void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
                             const MachineLocation &Location) {
   unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
   DIEBlock *Block = new DIEBlock();
 
   if (Location.isReg()) {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
     } else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
   } else {
     if (Reg < 32) {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
     } else {
-      AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
-      AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
     }
 
-    AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
   }
 
-  AddBlock(Die, Attribute, 0, Block);
+  addBlock(Die, Attribute, 0, Block);
 }
 
-/// AddType - Add a new type attribute to the specified entity.
-void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+/// addType - Add a new type attribute to the specified entity.
+void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
   if (Ty.isNull())
     return;
 
   // Check for pre-existence.
-  DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode());
+  DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode());
 
   // If it exists then use the existing value.
-  if (Slot) {
-    Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
     return;
   }
 
   // Set up proxy.
-  Slot = CreateDIEEntry();
+  Entry = createDIEEntry();
+  DW_Unit->insertDIEEntry(Ty.getNode(), Entry);
 
   // Construct type.
-  DIE Buffer(dwarf::DW_TAG_base_type);
+  DIE *Buffer = new DIE(dwarf::DW_TAG_base_type);
   if (Ty.isBasicType())
-    ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode()));
   else if (Ty.isCompositeType())
-    ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode()));
   else {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode()));
+    constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode()));
   }
 
   // Add debug information entry to entity and appropriate context.
   DIE *Die = NULL;
   DIDescriptor Context = Ty.getContext();
   if (!Context.isNull())
-    Die = DW_Unit->getDieMapSlotFor(Context.getNode());
+    Die = DW_Unit->getDIE(Context.getNode());
 
-  if (Die) {
-    DIE *Child = new DIE(Buffer);
-    Die->AddChild(Child);
-    Buffer.Detach();
-    SetDIEEntry(Slot, Child);
-  } else {
-    Die = DW_Unit->AddDie(Buffer);
-    SetDIEEntry(Slot, Die);
-  }
-
-  Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot);
+  if (Die)
+    Die->addChild(Buffer);
+  else
+    DW_Unit->addDie(Buffer);
+  Entry->setEntry(Buffer);
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
 
-/// ConstructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIBasicType BTy) {
   // Get core information.
-  const char *Name = BTy.getName();
+  StringRef Name = BTy.getName();
   Buffer.setTag(dwarf::DW_TAG_base_type);
-  AddUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
           BTy.getEncoding());
 
   // Add name if not anonymous or intermediate type.
-  if (Name)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   uint64_t Size = BTy.getSizeInBits() >> 3;
-  AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 }
 
-/// ConstructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DIDerivedType DTy) {
   // Get core information.
-  const char *Name = DTy.getName();
+  StringRef Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
   unsigned Tag = DTy.getTag();
 
@@ -918,26 +812,26 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 
   // Map to main type, void will not have a type.
   DIType FromTy = DTy.getTypeDerivedFrom();
-  AddType(DW_Unit, &Buffer, FromTy);
+  addType(DW_Unit, &Buffer, FromTy);
 
   // Add name if not anonymous or intermediate type.
-  if (Name && Tag != dwarf::DW_TAG_pointer_type)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
   if (Size)
-    AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
   // Add source line info if available and TyDesc is not a forward declaration.
   if (!DTy.isForwardDecl())
-    AddSourceLine(&Buffer, &DTy);
+    addSourceLine(&Buffer, &DTy);
 }
 
-/// ConstructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                   DICompositeType CTy) {
   // Get core information.
-  const char *Name = CTy.getName();
+  StringRef Name = CTy.getName();
 
   uint64_t Size = CTy.getSizeInBits() >> 3;
   unsigned Tag = CTy.getTag();
@@ -946,7 +840,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   switch (Tag) {
   case dwarf::DW_TAG_vector_type:
   case dwarf::DW_TAG_array_type:
-    ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+    constructArrayTypeDIE(DW_Unit, Buffer, &CTy);
     break;
   case dwarf::DW_TAG_enumeration_type: {
     DIArray Elements = CTy.getTypeArray();
@@ -956,8 +850,8 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
       DIE *ElemDie = NULL;
       DIEnumerator Enum(Elements.getElement(i).getNode());
       if (!Enum.isNull()) {
-        ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum);
-        Buffer.AddChild(ElemDie);
+        ElemDie = constructEnumTypeDIE(DW_Unit, &Enum);
+        Buffer.addChild(ElemDie);
       }
     }
   }
@@ -966,17 +860,17 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    AddType(DW_Unit, &Buffer, DIType(RTy.getNode()));
+    addType(DW_Unit, &Buffer, DIType(RTy.getNode()));
 
     // Add prototype flag.
-    AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
     // Add arguments.
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      AddType(DW_Unit, Arg, DIType(Ty.getNode()));
-      Buffer.AddChild(Arg);
+      addType(DW_Unit, Arg, DIType(Ty.getNode()));
+      Buffer.addChild(Arg);
     }
   }
     break;
@@ -997,20 +891,20 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
         continue;
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
-        ElemDie = CreateSubprogramDIE(DW_Unit,
+        ElemDie = createSubprogramDIE(DW_Unit,
                                       DISubprogram(Element.getNode()));
       else
-        ElemDie = CreateMemberDIE(DW_Unit,
+        ElemDie = createMemberDIE(DW_Unit,
                                   DIDerivedType(Element.getNode()));
-      Buffer.AddChild(ElemDie);
+      Buffer.addChild(ElemDie);
     }
 
     if (CTy.isAppleBlockExtension())
-      AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
     unsigned RLang = CTy.getRunTimeLang();
     if (RLang)
-      AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
               dwarf::DW_FORM_data1, RLang);
     break;
   }
@@ -1019,136 +913,143 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   }
 
   // Add name if not anonymous or intermediate type.
-  if (Name)
-    AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type ||
       Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
     // Add size if non-zero (derived types might be zero-sized.)
     if (Size)
-      AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
     else {
       // Add zero size if it is not a forward declaration.
       if (CTy.isForwardDecl())
-        AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       else
-        AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
     }
 
     // Add source line info if available.
     if (!CTy.isForwardDecl())
-      AddSourceLine(&Buffer, &CTy);
+      addSourceLine(&Buffer, &CTy);
   }
 }
 
-/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
-void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   int64_t L = SR.getLo();
   int64_t H = SR.getHi();
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
 
-  AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
   if (L)
-    AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
   if (H)
-    AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+    addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
 
-  Buffer.AddChild(DW_Subrange);
+  Buffer.addChild(DW_Subrange);
 }
 
-/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                                        DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
   if (CTy->getTag() == dwarf::DW_TAG_vector_type)
-    AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
 
   // Emit derived type.
-  AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+  addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
   DIArray Elements = CTy->getTypeArray();
 
-  // Construct an anonymous type for index type.
-  DIE IdxBuffer(dwarf::DW_TAG_base_type);
-  AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
-  AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          dwarf::DW_ATE_signed);
-  DIE *IndexTy = DW_Unit->AddDie(IdxBuffer);
+  // Get an anonymous type for index type.
+  DIE *IdxTy = DW_Unit->getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    DW_Unit->addDie(IdxTy);
+    DW_Unit->setIndexTyDie(IdxTy);
+  }
 
   // Add subranges to array type.
   for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
     DIDescriptor Element = Elements.getElement(i);
     if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy);
+      constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy);
   }
 }
 
-/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
-  const char *Name = ETy->getName();
-  AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  StringRef Name = ETy->getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   int64_t Value = ETy->getEnumValue();
-  AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
   return Enumerator;
 }
 
-/// CreateGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+/// createGlobalVariableDIE - Create new DIE using GV.
+DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
                                          const DIGlobalVariable &GV) {
-  // If the global variable was optmized out then no need to create debug info entry.
+  // If the global variable was optmized out then no need to create debug info
+  // entry.
   if (!GV.getGlobal()) return NULL;
-  if (!GV.getDisplayName()) return NULL;
+  if (GV.getDisplayName().empty()) return NULL;
 
   DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
-  AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
+  addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
             GV.getDisplayName());
 
-  const char *LinkageName = GV.getLinkageName();
-  if (LinkageName) {
+  StringRef LinkageName = GV.getLinkageName();
+  if (!LinkageName.empty()) {
     // Skip special LLVM prefix that is used to inform the asm printer to not
     // emit usual symbol prefix before the symbol name. This happens for
     // Objective-C symbol names and symbol whose name is replaced using GCC's
     // __asm__ attribute.
     if (LinkageName[0] == 1)
-      LinkageName = &LinkageName[1];
-    AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+      LinkageName = LinkageName.substr(1);
+    addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
   }
-  AddType(DW_Unit, GVDie, GV.getType());
+  addType(DW_Unit, GVDie, GV.getType());
   if (!GV.isLocalToUnit())
-    AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-  AddSourceLine(GVDie, &GV);
+    addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+  addSourceLine(GVDie, &GV);
 
   // Add address.
   DIEBlock *Block = new DIEBlock();
-  AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
-  AddObjectLabel(Block, 0, dwarf::DW_FORM_udata,
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+  addObjectLabel(Block, 0, dwarf::DW_FORM_udata,
                  Asm->Mang->getMangledName(GV.getGlobal()));
-  AddBlock(GVDie, dwarf::DW_AT_location, 0, Block);
+  addBlock(GVDie, dwarf::DW_AT_location, 0, Block);
 
   return GVDie;
 }
 
-/// CreateMemberDIE - Create new member DIE.
-DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+/// createMemberDIE - Create new member DIE.
+DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
   DIE *MemberDie = new DIE(DT.getTag());
-  if (const char *Name = DT.getName())
-    AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  
+  addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
 
-  AddSourceLine(MemberDie, &DT);
+  addSourceLine(MemberDie, &DT);
 
   DIEBlock *MemLocationDie = new DIEBlock();
-  AddUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
 
   uint64_t Size = DT.getSizeInBits();
   uint64_t FieldSize = DT.getOriginalTypeSize();
 
   if (Size != FieldSize) {
     // Handle bitfield.
-    AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
-    AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
 
     uint64_t Offset = DT.getOffsetInBits();
     uint64_t FieldOffset = Offset;
@@ -1159,49 +1060,48 @@ DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
 
     // Maybe we need to work from the other end.
     if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size);
-    AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
 
     // Here WD_AT_data_member_location points to the anonymous
     // field that includes this bit field.
-    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
 
   } else
     // This is not a bitfield.
-    AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
 
-  AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+  addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
             dwarf::DW_ACCESS_private);
 
   return MemberDie;
 }
 
-/// CreateSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
                                      const DISubprogram &SP,
                                      bool IsConstructor,
                                      bool IsInlined) {
   DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+  addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
-  const char * Name = SP.getName();
-  AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  const char *LinkageName = SP.getLinkageName();
-  if (LinkageName) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not emit
-    // usual symbol prefix before the symbol name. This happens for Objective-C
-    // symbol names and symbol whose name is replaced using GCC's __asm__ attribute.
+  StringRef LinkageName = SP.getLinkageName();
+  if (!LinkageName.empty()) {
+    // Skip special LLVM prefix that is used to inform the asm printer to not
+    // emit usual symbol prefix before the symbol name. This happens for
+    // Objective-C symbol names and symbol whose name is replaced using GCC's
+    // __asm__ attribute.
     if (LinkageName[0] == 1)
-      LinkageName = &LinkageName[1];
-    AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+      LinkageName = LinkageName.substr(1);
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
   }
-  AddSourceLine(SPDie, &SP);
+  addSourceLine(SPDie, &SP);
 
   DICompositeType SPTy = SP.getType();
   DIArray Args = SPTy.getTypeArray();
@@ -1210,53 +1110,52 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
   unsigned Lang = SP.getCompileUnit().getLanguage();
   if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
       Lang == dwarf::DW_LANG_ObjC)
-    AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
   unsigned SPTag = SPTy.getTag();
   if (!IsConstructor) {
     if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
-      AddType(DW_Unit, SPDie, SPTy);
+      addType(DW_Unit, SPDie, SPTy);
     else
-      AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
+      addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
   }
 
   if (!SP.isDefinition()) {
-    AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled through RecordVariable.
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
-        AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
-        SPDie->AddChild(Arg);
+        addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
+        addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+        SPDie->addChild(Arg);
       }
   }
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
-  Slot = SPDie;
+  DW_Unit->insertDIE(SP.getNode(), SPDie);
   return SPDie;
 }
 
-/// FindCompileUnit - Get the compile unit for the given descriptor.
+/// findCompileUnit - Get the compile unit for the given descriptor.
 ///
-CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const {
+CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const {
   DenseMap<Value *, CompileUnit *>::const_iterator I =
     CompileUnitMap.find(Unit.getNode());
   assert(I != CompileUnitMap.end() && "Missing compile unit.");
   return *I->second;
 }
 
-/// CreateDbgScopeVariable - Create a new scope variable.
+/// createDbgScopeVariable - Create a new scope variable.
 ///
-DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
+DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
-  const char *Name = VD.getName();
-  if (!Name)
+  StringRef Name = VD.getName();
+  if (Name.empty())
     return NULL;
 
   // Translate tag to proper Dwarf tag.  The result variable is dropped for
@@ -1276,33 +1175,34 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
   // Define variable debug information entry.
   DIE *VariableDie = new DIE(Tag);
-  AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
 
   // Add source line info if available.
-  AddSourceLine(VariableDie, &VD);
+  addSourceLine(VariableDie, &VD);
 
   // Add variable type.
-  // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
   if (VD.isBlockByrefVariable())
-    AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+    addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
   else
-    AddType(Unit, VariableDie, VD.getType());
+    addType(Unit, VariableDie, VD.getType());
 
   // Add variable address.
   // Variables for abstract instances of inlined functions don't get a
   // location.
   MachineLocation Location;
-  Location.set(RI->getFrameRegister(*MF),
-               RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-  
-  
+  unsigned FrameReg;
+  int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+  Location.set(FrameReg, Offset);
+
+
   if (VD.hasComplexAddress())
-    AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
   else if (VD.isBlockByrefVariable())
-    AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+    addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
   else
-    AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+    addAddress(VariableDie, dwarf::DW_AT_location, Location);
 
   return VariableDie;
 }
@@ -1329,17 +1229,17 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
   DbgScope *Parent = NULL;
   if (GetConcreteScope) {
     DILocation IL(InlinedAt);
-    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, 
+    Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
                          IL.getOrigLocation().getNode());
     assert (Parent && "Unable to find Parent scope!");
     NScope->setParent(Parent);
-    Parent->AddScope(NScope);
+    Parent->addScope(NScope);
   } else if (DIDescriptor(N).isLexicalBlock()) {
     DILexicalBlock DB(N);
     if (!DB.getContext().isNull()) {
       Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
       NScope->setParent(Parent);
-      Parent->AddScope(NScope);
+      Parent->addScope(NScope);
     }
   }
 
@@ -1365,7 +1265,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   DbgScope *AScope = AbstractScopes.lookup(N);
   if (AScope)
     return AScope;
-    
+
   DbgScope *Parent = NULL;
 
   DIDescriptor Scope(N);
@@ -1379,7 +1279,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
 
   if (Parent)
-    Parent->AddScope(AScope);
+    Parent->addScope(AScope);
   AScope->setAbstractScope();
   AbstractScopes[N] = AScope;
   if (DIDescriptor(N).isSubprogram())
@@ -1387,54 +1287,43 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
   return AScope;
 }
 
-static DISubprogram getDISubprogram(MDNode *N) {
-
-  DIDescriptor D(N);
-  if (D.isNull())
-    return DISubprogram();
-
-  if (D.isCompileUnit()) 
-    return DISubprogram();
-
-  if (D.isSubprogram())
-    return DISubprogram(N);
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(N).getContext().getNode());
-
-  llvm_unreachable("Unexpected Descriptor!");
-}
-
-DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) {
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
 
- DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode);
+ DIE *SPDie = ModuleCU->getDIE(SPNode);
  assert (SPDie && "Unable to find subprogram DIE!");
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
           DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
           DWLabel("func_end", SubprogramCount));
  MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
- 
+ addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
  if (!DISubprogram(SPNode).isLocalToUnit())
-   AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+   addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
  // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), 
+ for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
         SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
    MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
    if (!N) continue;
    DIGlobalVariable GV(N);
    if (GV.getContext().getNode() == SPNode) {
-     DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+     DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV);
      if (ScopedGVDie)
-       SPDie->AddChild(ScopedGVDie);
+       SPDie->addChild(ScopedGVDie);
    }
  }
+ 
  return SPDie;
 }
 
-DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
   unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
   unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
 
@@ -1446,13 +1335,13 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
-           StartID ? 
-             DWLabel("label", StartID) 
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           StartID ?
+             DWLabel("label", StartID)
            : DWLabel("func_begin", SubprogramCount));
-  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
-           EndID ? 
-             DWLabel("label", EndID) 
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           EndID ?
+             DWLabel("label", EndID)
            : DWLabel("func_end", SubprogramCount));
 
 
@@ -1460,7 +1349,10 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
   return ScopeDIE;
 }
 
-DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
   unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
   assert (StartID && "Invalid starting label for an inlined scope!");
@@ -1475,14 +1367,14 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
 
   DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-  DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+  DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
   assert (OriginDIE && "Unable to find Origin DIE!");
-  AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+  addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
               dwarf::DW_FORM_ref4, OriginDIE);
 
-  AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
            DWLabel("label", StartID));
-  AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
            DWLabel("label", EndID));
 
   InlinedSubprogramDIEs.insert(OriginDIE);
@@ -1492,7 +1384,8 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
     I = InlineInfo.find(InlinedSP.getNode());
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE));
+    InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID,
+                                                             ScopeDIE));
     InlinedSPNodes.push_back(InlinedSP.getNode());
   } else
     I->second.push_back(std::make_pair(StartID, ScopeDIE));
@@ -1500,18 +1393,20 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
   StringPool.insert(InlinedSP.getName());
   StringPool.insert(InlinedSP.getLinkageName());
   DILocation DL(Scope->getInlinedAt());
-  AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
-  AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
   return ScopeDIE;
 }
 
-DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, 
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
                                       DbgScope *Scope, CompileUnit *Unit) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
-  const char *Name = VD.getName();
-  if (!Name)
+  StringRef Name = VD.getName();
+  if (Name.empty())
     return NULL;
 
   // Translate tag to proper Dwarf tag.  The result variable is dropped for
@@ -1536,50 +1431,74 @@ DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV,
   DIE *AbsDIE = NULL;
   if (DbgVariable *AV = DV->getAbstractVariable())
     AbsDIE = AV->getDIE();
-  
+
   if (AbsDIE) {
     DIScope DS(Scope->getScopeNode());
     DISubprogram InlinedSP = getDISubprogram(DS.getNode());
-    DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+    DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
     (void) OriginSPDIE;
     assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
     DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
     assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
-    AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+    addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                 dwarf::DW_FORM_ref4, AbsDIE);
   }
   else {
-    AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-    AddSourceLine(VariableDie, &VD);
+    addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addSourceLine(VariableDie, &VD);
 
     // Add variable type.
-    // FIXME: isBlockByrefVariable should be reformulated in terms of complex 
+    // FIXME: isBlockByrefVariable should be reformulated in terms of complex
     // addresses instead.
     if (VD.isBlockByrefVariable())
-      AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+      addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
     else
-      AddType(Unit, VariableDie, VD.getType());
+      addType(Unit, VariableDie, VD.getType());
   }
 
   // Add variable address.
   if (!Scope->isAbstractScope()) {
     MachineLocation Location;
-    Location.set(RI->getFrameRegister(*MF),
-                 RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-    
-    
+    unsigned FrameReg;
+    int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+    Location.set(FrameReg, Offset);
+
     if (VD.hasComplexAddress())
-      AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
     else if (VD.isBlockByrefVariable())
-      AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
     else
-      AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+      addAddress(VariableDie, dwarf::DW_AT_location, Location);
   }
   DV->setDIE(VariableDie);
   return VariableDie;
 
 }
-DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
+
+void DwarfDebug::addPubTypes(DISubprogram SP) {
+  DICompositeType SPTy = SP.getType();
+  unsigned SPTag = SPTy.getTag();
+  if (SPTag != dwarf::DW_TAG_subroutine_type) 
+    return;
+
+  DIArray Args = SPTy.getTypeArray();
+  if (Args.isNull()) 
+    return;
+
+  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+    DIType ATy(Args.getElement(i).getNode());
+    if (ATy.isNull())
+      continue;
+    DICompositeType CATy = getDICompositeType(ATy);
+    if (!CATy.isNull() && !CATy.getName().empty()) {
+      if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
+        ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
+    }
+  }
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
  if (!Scope)
   return NULL;
  DIScope DS(Scope->getScopeNode());
@@ -1588,43 +1507,46 @@ DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
 
  DIE *ScopeDIE = NULL;
  if (Scope->getInlinedAt())
-   ScopeDIE = ConstructInlinedScopeDIE(Scope);
+   ScopeDIE = constructInlinedScopeDIE(Scope);
  else if (DS.isSubprogram()) {
    if (Scope->isAbstractScope())
-     ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode());
+     ScopeDIE = ModuleCU->getDIE(DS.getNode());
    else
-     ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode());
+     ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
  }
  else {
-   ScopeDIE = ConstructLexicalScopeDIE(Scope);
+   ScopeDIE = constructLexicalScopeDIE(Scope);
    if (!ScopeDIE) return NULL;
  }
 
   // Add variables to scope.
   SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU);
-    if (VariableDIE) 
-      ScopeDIE->AddChild(VariableDIE);
+    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU);
+    if (VariableDIE)
+      ScopeDIE->addChild(VariableDIE);
   }
 
   // Add nested scopes.
   SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
     // Define the Scope debug information entry.
-    DIE *NestedDIE = ConstructScopeDIE(Scopes[j]);
-    if (NestedDIE) 
-      ScopeDIE->AddChild(NestedDIE);
+    DIE *NestedDIE = constructScopeDIE(Scopes[j]);
+    if (NestedDIE)
+      ScopeDIE->addChild(NestedDIE);
   }
-  return ScopeDIE;
+
+  if (DS.isSubprogram()) 
+    addPubTypes(DISubprogram(DS.getNode()));
+ 
+ return ScopeDIE;
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
-                                         const char *FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) {
   unsigned DId;
   StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
   if (DI != DirectoryIdMap.end()) {
@@ -1657,33 +1579,34 @@ unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName,
   return SrcId;
 }
 
-void DwarfDebug::ConstructCompileUnit(MDNode *N) {
+void DwarfDebug::constructCompileUnit(MDNode *N) {
   DICompileUnit DIUnit(N);
-  const char *FN = DIUnit.getFilename();
-  const char *Dir = DIUnit.getDirectory();
+  StringRef FN = DIUnit.getFilename();
+  StringRef Dir = DIUnit.getDirectory();
   unsigned ID = GetOrCreateSourceID(Dir, FN);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+  addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                    DWLabel("section_line", 0), DWLabel("section_line", 0),
                    false);
-  AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+  addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
             DIUnit.getProducer());
-  AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
+  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
           DIUnit.getLanguage());
-  AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
 
-  if (Dir)
-    AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (!Dir.empty())
+    addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
   if (DIUnit.isOptimized())
-    AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+    addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
-  if (const char *Flags = DIUnit.getFlags())
-    AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+  StringRef Flags = DIUnit.getFlags();
+  if (!Flags.empty())
+    addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
 
   unsigned RVer = DIUnit.getRunTimeVersion();
   if (RVer)
-    AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+    addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
   CompileUnit *Unit = new CompileUnit(ID, Die);
@@ -1697,7 +1620,7 @@ void DwarfDebug::ConstructCompileUnit(MDNode *N) {
   CompileUnits.push_back(Unit);
 }
 
-void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
+void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   DIGlobalVariable DI_GV(N);
 
   // If debug information is malformed then ignore it.
@@ -1705,29 +1628,34 @@ void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) {
     return;
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode());
-  if (Slot)
+  if (ModuleCU->getDIE(DI_GV.getNode()))
     return;
 
-  DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV);
+  DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV);
 
   // Add to map.
-  Slot = VariableDie;
+  ModuleCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
-  ModuleCU->getDie()->AddChild(VariableDie);
+  ModuleCU->getCUDie()->addChild(VariableDie);
 
   // Expose as global. FIXME - need to check external flag.
-  ModuleCU->AddGlobal(DI_GV.getName(), VariableDie);
+  ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
+
+  DIType GTy = DI_GV.getType();
+  if (GTy.isCompositeType() && !GTy.getName().empty()) {
+    DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
+    assert (Entry && "Missing global type!");
+    ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
+  }
   return;
 }
 
-void DwarfDebug::ConstructSubprogram(MDNode *N) {
+void DwarfDebug::constructSubprogramDIE(MDNode *N) {
   DISubprogram SP(N);
 
   // Check for pre-existence.
-  DIE *&Slot = ModuleCU->getDieMapSlotFor(N);
-  if (Slot)
+  if (ModuleCU->getDIE(N))
     return;
 
   if (!SP.isDefinition())
@@ -1735,23 +1663,24 @@ void DwarfDebug::ConstructSubprogram(MDNode *N) {
     // class type.
     return;
 
-  DIE *SubprogramDie = CreateSubprogramDIE(ModuleCU, SP);
+  DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP);
 
   // Add to map.
-  Slot = SubprogramDie;
+  ModuleCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
-  ModuleCU->getDie()->AddChild(SubprogramDie);
+  ModuleCU->getCUDie()->addChild(SubprogramDie);
 
   // Expose as global.
-  ModuleCU->AddGlobal(SP.getName(), SubprogramDie);
+  ModuleCU->addGlobal(SP.getName(), SubprogramDie);
+
   return;
 }
 
-/// BeginModule - Emit all Dwarf sections that should come prior to the
+/// beginModule - Emit all Dwarf sections that should come prior to the
 /// content. Create global DIEs and emit initial debug info sections.
 /// This is inovked by the target AsmPrinter.
-void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
+void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
   this->M = M;
 
   if (TimePassesIsEnabled)
@@ -1766,7 +1695,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   // Create all the compile unit DIEs.
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
          E = DbgFinder.compile_unit_end(); I != E; ++I)
-    ConstructCompileUnit(*I);
+    constructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
     if (TimePassesIsEnabled)
@@ -1787,13 +1716,13 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
     if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
       ScopedGVs.push_back(*I);
     else
-      ConstructGlobalVariableDIE(*I);
+      constructGlobalVariableDIE(*I);
   }
 
   // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I)
-    ConstructSubprogram(*I);
+    constructSubprogramDIE(*I);
 
   MMI = mmi;
   shouldEmit = true;
@@ -1819,15 +1748,15 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
   }
 
   // Emit initial sections
-  EmitInitial();
+  emitInitial();
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
 
-/// EndModule - Emit all Dwarf sections that should come after the content.
+/// endModule - Emit all Dwarf sections that should come after the content.
 ///
-void DwarfDebug::EndModule() {
+void DwarfDebug::endModule() {
   if (!ModuleCU)
     return;
 
@@ -1838,7 +1767,7 @@ void DwarfDebug::EndModule() {
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
          AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
     DIE *ISP = *AI;
-    AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+    addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
   // Standard sections final addresses.
@@ -1854,52 +1783,56 @@ void DwarfDebug::EndModule() {
   }
 
   // Emit common frame information.
-  EmitCommonDebugFrame();
+  emitCommonDebugFrame();
 
   // Emit function debug frame information
   for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
          E = DebugFrames.end(); I != E; ++I)
-    EmitFunctionDebugFrame(*I);
+    emitFunctionDebugFrame(*I);
 
   // Compute DIE offsets and sizes.
-  SizeAndOffsets();
+  computeSizeAndOffsets();
 
   // Emit all the DIEs into a debug info section
-  EmitDebugInfo();
+  emitDebugInfo();
 
   // Corresponding abbreviations into a abbrev section.
-  EmitAbbreviations();
+  emitAbbreviations();
 
   // Emit source line correspondence into a debug line section.
-  EmitDebugLines();
+  emitDebugLines();
 
   // Emit info into a debug pubnames section.
-  EmitDebugPubNames();
+  emitDebugPubNames();
+
+  // Emit info into a debug pubtypes section.
+  emitDebugPubTypes();
 
   // Emit info into a debug str section.
-  EmitDebugStr();
+  emitDebugStr();
 
   // Emit info into a debug loc section.
-  EmitDebugLoc();
+  emitDebugLoc();
 
   // Emit info into a debug aranges section.
   EmitDebugARanges();
 
   // Emit info into a debug ranges section.
-  EmitDebugRanges();
+  emitDebugRanges();
 
   // Emit info into a debug macinfo section.
-  EmitDebugMacInfo();
+  emitDebugMacInfo();
 
   // Emit inline info.
-  EmitDebugInlineInfo();
+  emitDebugInlineInfo();
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
 
 /// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              unsigned FrameIdx,
                                               DILocation &ScopeLoc) {
 
   DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
@@ -1911,13 +1844,13 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx
     return NULL;
 
   AbsDbgVariable = new DbgVariable(Var, FrameIdx);
-  Scope->AddVariable(AbsDbgVariable);
+  Scope->addVariable(AbsDbgVariable);
   AbstractVariables[Var.getNode()] = AbsDbgVariable;
   return AbsDbgVariable;
 }
 
-/// CollectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::CollectVariableInfo() {
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::collectVariableInfo() {
   if (!MMI) return;
 
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
@@ -1933,31 +1866,32 @@ void DwarfDebug::CollectVariableInfo() {
     DbgScope *Scope =
       ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
     if (!Scope)
-      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); 
+      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
     DbgVariable *RegVar = new DbgVariable(DV, VP.first);
-    Scope->AddVariable(RegVar);
-    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc))
+    Scope->addVariable(RegVar);
+    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first,
+                                                           ScopeLoc))
       RegVar->setAbstractVariable(AbsDbgVariable);
   }
 }
 
-/// BeginScope - Process beginning of a scope starting at Label.
-void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) {
+/// beginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::beginScope(const MachineInstr *MI, unsigned Label) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
   if (I == DbgScopeBeginMap.end())
     return;
-  ScopeVector &SD = DbgScopeBeginMap[MI];
+  ScopeVector &SD = I->second;
   for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI) 
+       SDI != SDE; ++SDI)
     (*SDI)->setStartLabelID(Label);
 }
 
-/// EndScope - Process end of a scope.
-void DwarfDebug::EndScope(const MachineInstr *MI) {
+/// endScope - Process end of a scope.
+void DwarfDebug::endScope(const MachineInstr *MI) {
   InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
   if (I == DbgScopeEndMap.end())
     return;
@@ -1967,7 +1901,7 @@ void DwarfDebug::EndScope(const MachineInstr *MI) {
 
   SmallVector<DbgScope *, 2> &SD = I->second;
   for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
-       SDI != SDE; ++SDI) 
+       SDI != SDE; ++SDI)
     (*SDI)->setEndLabelID(Label);
   return;
 }
@@ -1981,7 +1915,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
       return;
     WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
-    if (DIDescriptor(Scope).isLexicalBlock()) 
+    if (DIDescriptor(Scope).isLexicalBlock())
       createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
     return;
   }
@@ -1996,9 +1930,9 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
   createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
 }
 
-/// ExtractScopeInformation - Scan machine instructions in this function
+/// extractScopeInformation - Scan machine instructions in this function
 /// and collect DbgScopes. Return true, if atleast one scope was found.
-bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
+bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
   // If scope information was extracted using .dbg intrinsics then there is not
   // any need to extract these information by scanning each instruction.
   if (!DbgScopeMap.empty())
@@ -2015,7 +1949,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
       if (!DLT.Scope) continue;
       // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated 
+      // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
       if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
       createDbgScope(DLT.Scope, DLT.InlinedAtLoc);
@@ -2034,7 +1968,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
       if (!DLT.Scope)  continue;
       // There is no need to create another DIE for compile unit. For all
-      // other scopes, create one DbgScope now. This will be translated 
+      // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
       if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
       DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
@@ -2049,7 +1983,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
     if (DI->second->isAbstractScope())
       continue;
     assert (DI->second->getFirstInsn() && "Invalid first instruction!");
-    DI->second->FixInstructionMarkers();
+    DI->second->fixInstructionMarkers();
     assert (DI->second->getLastInsn() && "Invalid last instruction!");
   }
 
@@ -2083,9 +2017,9 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
   return !DbgScopeMap.empty();
 }
 
-/// BeginFunction - Gather pre-function debug information.  Assumes being
+/// beginFunction - Gather pre-function debug information.  Assumes being
 /// emitted immediately after the function entry point.
-void DwarfDebug::BeginFunction(MachineFunction *MF) {
+void DwarfDebug::beginFunction(MachineFunction *MF) {
   this->MF = MF;
 
   if (!ShouldEmitDwarfDebug()) return;
@@ -2093,9 +2027,10 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  if (!ExtractScopeInformation(MF))
+  if (!extractScopeInformation(MF))
     return;
-  CollectVariableInfo();
+
+  collectVariableInfo();
 
   // Begin accumulating function debug information.
   MMI->BeginFunction(MF);
@@ -2111,9 +2046,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
     unsigned LabelID = 0;
     DISubprogram SP = getDISubprogram(DLT.Scope);
     if (!SP.isNull())
-      LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
+      LabelID = recordSourceLine(SP.getLineNumber(), 0, DLT.Scope);
     else
-      LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
+      LabelID = recordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
     Asm->printLabel(LabelID);
     O << '\n';
   }
@@ -2121,9 +2056,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
     DebugTimer->stopTimer();
 }
 
-/// EndFunction - Gather and emit post-function debug information.
+/// endFunction - Gather and emit post-function debug information.
 ///
-void DwarfDebug::EndFunction(MachineFunction *MF) {
+void DwarfDebug::endFunction(MachineFunction *MF) {
   if (!ShouldEmitDwarfDebug()) return;
 
   if (TimePassesIsEnabled)
@@ -2148,10 +2083,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
 
   // Construct abstract scopes.
   for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
-         AE = AbstractScopesList.end(); AI != AE; ++AI) 
-    ConstructScopeDIE(*AI);
+         AE = AbstractScopesList.end(); AI != AE; ++AI)
+    constructScopeDIE(*AI);
 
-  ConstructScopeDIE(CurrentFnDbgScope);
+  constructScopeDIE(CurrentFnDbgScope);
 
   DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
                                                MMI->getFrameMoves()));
@@ -2172,10 +2107,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
     DebugTimer->stopTimer();
 }
 
-/// RecordSourceLine - Records location information and associates it with a
+/// recordSourceLine - Records location information and associates it with a
 /// label. Returns a unique label ID used to generate a label and provide
 /// correspondence to the source line list.
-unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, 
+unsigned DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
                                       MDNode *S) {
   if (!MMI)
     return 0;
@@ -2183,8 +2118,8 @@ unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col,
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
-  const char *Dir = NULL;
-  const char *Fn = NULL;
+  StringRef Dir;
+  StringRef Fn;
 
   DIDescriptor Scope(S);
   if (Scope.isCompileUnit()) {
@@ -2234,17 +2169,18 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-/// SizeAndOffsetDie - Compute the size and offset of a DIE.
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
 ///
-unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
   // If not last sibling and has children then add sibling offset attribute.
-  if (!Last && !Children.empty()) Die->AddSiblingOffset();
+  if (!Last && !Children.empty()) Die->addSiblingOffset();
 
   // Record the abbreviation.
-  AssignAbbrevNumber(Die->getAbbrev());
+  assignAbbrevNumber(Die->getAbbrev());
 
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
@@ -2270,7 +2206,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
            "Children flag not set");
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M);
+      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -2280,9 +2216,9 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) {
   return Offset;
 }
 
-/// SizeAndOffsets - Compute the size and offset of all the DIEs.
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
 ///
-void DwarfDebug::SizeAndOffsets() {
+void DwarfDebug::computeSizeAndOffsets() {
   // Compute size of compile unit header.
   static unsigned Offset =
     sizeof(int32_t) + // Length of Compilation Unit Info
@@ -2290,13 +2226,13 @@ void DwarfDebug::SizeAndOffsets() {
     sizeof(int32_t) + // Offset Into Abbrev. Section
     sizeof(int8_t);   // Pointer Size (in bytes)
 
-  SizeAndOffsetDie(ModuleCU->getDie(), Offset, true);
+  computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
   CompileUnitOffsets[ModuleCU] = 0;
 }
 
-/// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
+/// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
 /// tools to recognize the object file contains Dwarf information.
-void DwarfDebug::EmitInitial() {
+void DwarfDebug::emitInitial() {
   // Check to see if we already emitted intial headers.
   if (didInitial) return;
   didInitial = true;
@@ -2327,6 +2263,8 @@ void DwarfDebug::EmitInitial() {
   EmitLabel("section_loc", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection());
   EmitLabel("section_pubnames", 0);
+  Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubTypesSection());
+  EmitLabel("section_pubtypes", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection());
   EmitLabel("section_str", 0);
   Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection());
@@ -2338,9 +2276,9 @@ void DwarfDebug::EmitInitial() {
   EmitLabel("data_begin", 0);
 }
 
-/// EmitDIE - Recusively Emits a debug information entry.
+/// emitDIE - Recusively Emits a debug information entry.
 ///
-void DwarfDebug::EmitDIE(DIE *Die) {
+void DwarfDebug::emitDIE(DIE *Die) {
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
   const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
@@ -2370,7 +2308,7 @@ void DwarfDebug::EmitDIE(DIE *Die) {
 
     switch (Attr) {
     case dwarf::DW_AT_sibling:
-      Asm->EmitInt32(Die->SiblingOffset());
+      Asm->EmitInt32(Die->getSiblingOffset());
       break;
     case dwarf::DW_AT_abstract_origin: {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
@@ -2393,16 +2331,16 @@ void DwarfDebug::EmitDIE(DIE *Die) {
     const std::vector<DIE *> &Children = Die->getChildren();
 
     for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      EmitDIE(Children[j]);
+      emitDIE(Children[j]);
 
     Asm->EmitInt8(0); Asm->EOL("End Of Children Mark");
   }
 }
 
-/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
 ///
-void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
-  DIE *Die = Unit->getDie();
+void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
+  DIE *Die = Unit->getCUDie();
 
   // Emit the compile units header.
   EmitLabel("info_begin", Unit->getID());
@@ -2420,7 +2358,7 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
   Asm->EOL("Offset Into Abbrev. Section");
   Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
 
-  EmitDIE(Die);
+  emitDIE(Die);
   // FIXME - extra padding for gdb bug.
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
@@ -2431,17 +2369,17 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) {
   Asm->EOL();
 }
 
-void DwarfDebug::EmitDebugInfo() {
+void DwarfDebug::emitDebugInfo() {
   // Start debug info section.
   Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfInfoSection());
 
-  EmitDebugInfoPerCU(ModuleCU);
+  emitDebugInfoPerCU(ModuleCU);
 }
 
-/// EmitAbbreviations - Emit the abbreviation section.
+/// emitAbbreviations - Emit the abbreviation section.
 ///
-void DwarfDebug::EmitAbbreviations() const {
+void DwarfDebug::emitAbbreviations() const {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
@@ -2473,10 +2411,10 @@ void DwarfDebug::EmitAbbreviations() const {
   }
 }
 
-/// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
 /// the line matrix.
 ///
-void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
   Asm->EmitInt8(0); Asm->EOL("Extended Op");
   Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size");
@@ -2489,9 +2427,9 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1); Asm->EOL();
 }
 
-/// EmitDebugLines - Emit source line information.
+/// emitDebugLines - Emit source line information.
 ///
-void DwarfDebug::EmitDebugLines() {
+void DwarfDebug::emitDebugLines() {
   // If the target is using .loc/.file, the assembler will be emitting the
   // .debug_line table automatically.
   if (MAI->hasDotLocAndDotFile())
@@ -2640,22 +2578,22 @@ void DwarfDebug::EmitDebugLines() {
       }
     }
 
-    EmitEndOfLineMatrix(j + 1);
+    emitEndOfLineMatrix(j + 1);
   }
 
   if (SecSrcLinesSize == 0)
     // Because we're emitting a debug_line section, we still need a line
     // table. The linker and friends expect it to exist. If there's nothing to
     // put into it, emit an empty table.
-    EmitEndOfLineMatrix(1);
+    emitEndOfLineMatrix(1);
 
   EmitLabel("line_end", 0);
   Asm->EOL();
 }
 
-/// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
 ///
-void DwarfDebug::EmitCommonDebugFrame() {
+void DwarfDebug::emitCommonDebugFrame() {
   if (!MAI->doesDwarfRequireFrameSection())
     return;
 
@@ -2698,10 +2636,10 @@ void DwarfDebug::EmitCommonDebugFrame() {
   Asm->EOL();
 }
 
-/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// emitFunctionDebugFrame - Emit per function frame info into a debug frame
 /// section.
 void
-DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
   if (!MAI->doesDwarfRequireFrameSection())
     return;
 
@@ -2734,7 +2672,7 @@ DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
   Asm->EOL();
 }
 
-void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
+void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
   EmitDifference("pubnames_end", Unit->getID(),
                  "pubnames_begin", Unit->getID(), true);
   Asm->EOL("Length of Public Names Info");
@@ -2751,7 +2689,7 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
                  true);
   Asm->EOL("Compilation Unit Length");
 
-  StringMap<DIE*> &Globals = Unit->getGlobals();
+  const StringMap<DIE*> &Globals = Unit->getGlobals();
   for (StringMap<DIE*>::const_iterator
          GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
     const char *Name = GI->getKeyData();
@@ -2767,19 +2705,55 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) {
   Asm->EOL();
 }
 
-/// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
-void DwarfDebug::EmitDebugPubNames() {
+void DwarfDebug::emitDebugPubNames() {
   // Start the dwarf pubnames section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfPubNamesSection());
 
-  EmitDebugPubNamesPerCU(ModuleCU);
+  emitDebugPubNamesPerCU(ModuleCU);
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+  // Start the dwarf pubnames section.
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfPubTypesSection());
+  EmitDifference("pubtypes_end", ModuleCU->getID(),
+                 "pubtypes_begin", ModuleCU->getID(), true);
+  Asm->EOL("Length of Public Types Info");
+
+  EmitLabel("pubtypes_begin", ModuleCU->getID());
+
+  Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
+
+  EmitSectionOffset("info_begin", "section_info",
+                    ModuleCU->getID(), 0, true, false);
+  Asm->EOL("Offset of Compilation ModuleCU Info");
+
+  EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
+                 true);
+  Asm->EOL("Compilation ModuleCU Length");
+
+  const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
+  for (StringMap<DIE*>::const_iterator
+         GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+    const char *Name = GI->getKeyData();
+    DIE * Entity = GI->second;
+
+    Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset");
+    Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name");
+  }
+
+  Asm->EmitInt32(0); Asm->EOL("End Mark");
+  EmitLabel("pubtypes_end", ModuleCU->getID());
+
+  Asm->EOL();
 }
 
-/// EmitDebugStr - Emit visible names into a debug str section.
+/// emitDebugStr - Emit visible names into a debug str section.
 ///
-void DwarfDebug::EmitDebugStr() {
+void DwarfDebug::emitDebugStr() {
   // Check to see if it is worth the effort.
   if (!StringPool.empty()) {
     // Start the dwarf str section.
@@ -2801,9 +2775,9 @@ void DwarfDebug::EmitDebugStr() {
   }
 }
 
-/// EmitDebugLoc - Emit visible names into a debug loc section.
+/// emitDebugLoc - Emit visible names into a debug loc section.
 ///
-void DwarfDebug::EmitDebugLoc() {
+void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
                               Asm->getObjFileLowering().getDwarfLocSection());
@@ -2847,18 +2821,18 @@ void DwarfDebug::EmitDebugARanges() {
   Asm->EOL();
 }
 
-/// EmitDebugRanges - Emit visible names into a debug ranges section.
+/// emitDebugRanges - Emit visible names into a debug ranges section.
 ///
-void DwarfDebug::EmitDebugRanges() {
+void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
                             Asm->getObjFileLowering().getDwarfRangesSection());
   Asm->EOL();
 }
 
-/// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
 ///
-void DwarfDebug::EmitDebugMacInfo() {
+void DwarfDebug::emitDebugMacInfo() {
   if (const MCSection *LineInfo =
       Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
     // Start the dwarf macinfo section.
@@ -2867,7 +2841,7 @@ void DwarfDebug::EmitDebugMacInfo() {
   }
 }
 
-/// EmitDebugInlineInfo - Emit inline info using following format.
+/// emitDebugInlineInfo - Emit inline info using following format.
 /// Section Header:
 /// 1. length of section
 /// 2. Dwarf version number
@@ -2885,7 +2859,7 @@ void DwarfDebug::EmitDebugMacInfo() {
 /// inlined instance; the die_offset points to the inlined_subroutine die in the
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
-void DwarfDebug::EmitDebugInlineInfo() {
+void DwarfDebug::emitDebugInlineInfo() {
   if (!MAI->doesDwarfUsesInlineInfoSection())
     return;
 
@@ -2906,17 +2880,18 @@ void DwarfDebug::EmitDebugInlineInfo() {
 
   for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
-    
+
 //  for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
     //        I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
     MDNode *Node = *I;
-    ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node);
+    ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+      = InlineInfo.find(Node);
     SmallVector<InlineInfoLabels, 4> &Labels = II->second;
     DISubprogram SP(Node);
-    const char *LName = SP.getLinkageName();
-    const char *Name = SP.getName();
+    StringRef LName = SP.getLinkageName();
+    StringRef Name = SP.getName();
 
-    if (!LName)
+    if (LName.empty())
       Asm->EmitString(Name);
     else {
       // Skip special LLVM prefix that is used to inform the asm printer to not
@@ -2924,14 +2899,14 @@ void DwarfDebug::EmitDebugInlineInfo() {
       // Objective-C symbol names and symbol whose name is replaced using GCC's
       // __asm__ attribute.
       if (LName[0] == 1)
-        LName = &LName[1];
+        LName = LName.substr(1);
 //      Asm->EmitString(LName);
       EmitSectionOffset("string", "section_str",
                         StringPool.idFor(LName), false, true);
 
     }
     Asm->EOL("MIPS linkage name");
-//    Asm->EmitString(Name); 
+//    Asm->EmitString(Name);
     EmitSectionOffset("string", "section_str",
                       StringPool.idFor(Name), false, true);
     Asm->EOL("Function name");
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 646de8f36e145..679d9b9d1a0bf 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -106,13 +106,9 @@ class DwarfDebug : public Dwarf {
   /// Lines - List of of source line correspondence.
   std::vector<SrcLineInfo> Lines;
 
-  /// ValuesSet - Used to uniquely define values.
+  /// DIEValues - A list of all the unique values in use.
   ///
-  FoldingSet<DIEValue> ValuesSet;
-
-  /// Values - A list of all the unique values in use.
-  ///
-  std::vector<DIEValue *> Values;
+  std::vector<DIEValue *> DIEValues;
 
   /// StringPool - A UniqueVector of strings used by indirect references.
   ///
@@ -229,137 +225,135 @@ class DwarfDebug : public Dwarf {
     return SourceIds.size();
   }
 
-  /// AssignAbbrevNumber - Define a unique number for the abbreviation.
+  /// assignAbbrevNumber - Define a unique number for the abbreviation.
   ///
-  void AssignAbbrevNumber(DIEAbbrev &Abbrev);
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
-  /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
   /// information entry.
-  DIEEntry *CreateDIEEntry(DIE *Entry = NULL);
-
-  /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined.
-  ///
-  void SetDIEEntry(DIEEntry *Value, DIE *Entry);
+  DIEEntry *createDIEEntry(DIE *Entry = NULL);
 
-  /// AddUInt - Add an unsigned integer attribute data and value.
+  /// addUInt - Add an unsigned integer attribute data and value.
   ///
-  void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
 
-  /// AddSInt - Add an signed integer attribute data and value.
+  /// addSInt - Add an signed integer attribute data and value.
   ///
-  void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
 
-  /// AddString - Add a string attribute data and value.
+  /// addString - Add a string attribute data and value.
   ///
-  void AddString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const std::string &String);
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
 
-  /// AddLabel - Add a Dwarf label attribute data and value.
+  /// addLabel - Add a Dwarf label attribute data and value.
   ///
-  void AddLabel(DIE *Die, unsigned Attribute, unsigned Form,
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
                 const DWLabel &Label);
 
-  /// AddObjectLabel - Add an non-Dwarf label attribute data and value.
+  /// addObjectLabel - Add an non-Dwarf label attribute data and value.
   ///
-  void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
+  void addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form,
                       const std::string &Label);
 
-  /// AddSectionOffset - Add a section offset label attribute data and value.
+  /// addSectionOffset - Add a section offset label attribute data and value.
   ///
-  void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
+  void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
                         const DWLabel &Label, const DWLabel &Section,
                         bool isEH = false, bool useSet = true);
 
-  /// AddDelta - Add a label delta attribute data and value.
+  /// addDelta - Add a label delta attribute data and value.
   ///
-  void AddDelta(DIE *Die, unsigned Attribute, unsigned Form,
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
                 const DWLabel &Hi, const DWLabel &Lo);
 
-  /// AddDIEEntry - Add a DIE attribute data and value.
+  /// addDIEEntry - Add a DIE attribute data and value.
   ///
-  void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
-    Die->AddValue(Attribute, Form, CreateDIEEntry(Entry));
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) {
+    Die->addValue(Attribute, Form, createDIEEntry(Entry));
   }
 
-  /// AddBlock - Add block data.
+  /// addBlock - Add block data.
   ///
-  void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
 
-  /// AddSourceLine - Add location information to specified debug information
+  /// addSourceLine - Add location information to specified debug information
   /// entry.
-  void AddSourceLine(DIE *Die, const DIVariable *V);
-  void AddSourceLine(DIE *Die, const DIGlobal *G);
-  void AddSourceLine(DIE *Die, const DISubprogram *SP);
-  void AddSourceLine(DIE *Die, const DIType *Ty);
+  void addSourceLine(DIE *Die, const DIVariable *V);
+  void addSourceLine(DIE *Die, const DIGlobal *G);
+  void addSourceLine(DIE *Die, const DISubprogram *SP);
+  void addSourceLine(DIE *Die, const DIType *Ty);
 
-  /// AddAddress - Add an address attribute to a die based on the location
+  /// addAddress - Add an address attribute to a die based on the location
   /// provided.
-  void AddAddress(DIE *Die, unsigned Attribute,
+  void addAddress(DIE *Die, unsigned Attribute,
                   const MachineLocation &Location);
 
-  /// AddComplexAddress - Start with the address based on the location provided,
+  /// addComplexAddress - Start with the address based on the location provided,
   /// and generate the DWARF information necessary to find the actual variable
   /// (navigating the extra location information encoded in the type) based on
   /// the starting location.  Add the DWARF information to the die.
   ///
-  void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                          const MachineLocation &Location);
 
-  // FIXME: Should be reformulated in terms of AddComplexAddress.
-  /// AddBlockByrefAddress - Start with the address based on the location
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
   /// provided, and generate the DWARF information necessary to find the
   /// actual Block variable (navigating the Block struct) based on the
   /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use AddComplexAddress instead.
+  /// please use addComplexAddress instead.
   ///
-  void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
-  /// AddType - Add a new type attribute to the specified entity.
-  void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+
+  void addPubTypes(DISubprogram SP);
 
-  /// ConstructTypeDIE - Construct basic type die from DIBasicType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DIBasicType BTy);
 
-  /// ConstructTypeDIE - Construct derived type die from DIDerivedType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DIDerivedType DTy);
 
-  /// ConstructTypeDIE - Construct type DIE from DICompositeType.
-  void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
                         DICompositeType CTy);
 
-  /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
 
-  /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, 
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, 
                              DICompositeType *CTy);
 
-  /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
 
-  /// CreateGlobalVariableDIE - Create new DIE using GV.
-  DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit,
+  /// createGlobalVariableDIE - Create new DIE using GV.
+  DIE *createGlobalVariableDIE(CompileUnit *DW_Unit,
                                const DIGlobalVariable &GV);
 
-  /// CreateMemberDIE - Create new member DIE.
-  DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
 
-  /// CreateSubprogramDIE - Create new DIE using SP.
-  DIE *CreateSubprogramDIE(CompileUnit *DW_Unit,
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(CompileUnit *DW_Unit,
                            const DISubprogram &SP,
                            bool IsConstructor = false,
                            bool IsInlined = false);
 
-  /// FindCompileUnit - Get the compile unit for the given descriptor. 
+  /// findCompileUnit - Get the compile unit for the given descriptor. 
   ///
-  CompileUnit &FindCompileUnit(DICompileUnit Unit) const;
+  CompileUnit &findCompileUnit(DICompileUnit Unit) const;
 
-  /// CreateDbgScopeVariable - Create a new scope variable.
+  /// createDbgScopeVariable - Create a new scope variable.
   ///
-  DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+  DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
 
   /// getUpdatedDbgScope - Find or create DbgScope assicated with 
   /// the instruction. Initialize scope and update scope hierarchy.
@@ -374,88 +368,101 @@ class DwarfDebug : public Dwarf {
   DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
                                     DILocation &Loc);
 
-  DIE *UpdateSubprogramScopeDIE(MDNode *SPNode);
-  DIE *ConstructLexicalScopeDIE(DbgScope *Scope);
-  DIE *ConstructScopeDIE(DbgScope *Scope);
-  DIE *ConstructInlinedScopeDIE(DbgScope *Scope);
-  DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
+  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+  /// If there are global variables in this scope then create and insert
+  /// DIEs for these variables.
+  DIE *updateSubprogramScopeDIE(MDNode *SPNode);
 
-  /// ConstructDbgScope - Construct the components of a scope.
-  ///
-  void ConstructDbgScope(DbgScope *ParentScope,
-                         unsigned ParentStartID, unsigned ParentEndID,
-                         DIE *ParentDie, CompileUnit *Unit);
+  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
+  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  DIE *constructLexicalScopeDIE(DbgScope *Scope);
 
-  /// EmitInitial - Emit initial Dwarf declarations.  This is necessary for cc
+  /// constructInlinedScopeDIE - This scope represents inlined body of
+  /// a function. Construct DIE to represent this concrete inlined copy
+  /// of the function.
+  DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+
+  /// constructScopeDIE - Construct a DIE for this scope.
+  DIE *constructScopeDIE(DbgScope *Scope);
+
+  /// emitInitial - Emit initial Dwarf declarations.  This is necessary for cc
   /// tools to recognize the object file contains Dwarf information.
-  void EmitInitial();
+  void emitInitial();
 
-  /// EmitDIE - Recusively Emits a debug information entry.
+  /// emitDIE - Recusively Emits a debug information entry.
   ///
-  void EmitDIE(DIE *Die);
+  void emitDIE(DIE *Die);
 
-  /// SizeAndOffsetDie - Compute the size and offset of a DIE.
+  /// computeSizeAndOffset - Compute the size and offset of a DIE.
   ///
-  unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last);
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
 
-  /// SizeAndOffsets - Compute the size and offset of all the DIEs.
+  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
   ///
-  void SizeAndOffsets();
+  void computeSizeAndOffsets();
 
-  /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section.
+  /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
   ///
-  void EmitDebugInfoPerCU(CompileUnit *Unit);
+  void emitDebugInfoPerCU(CompileUnit *Unit);
 
-  void EmitDebugInfo();
+  void emitDebugInfo();
 
-  /// EmitAbbreviations - Emit the abbreviation section.
+  /// emitAbbreviations - Emit the abbreviation section.
   ///
-  void EmitAbbreviations() const;
+  void emitAbbreviations() const;
 
-  /// EmitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
   /// the line matrix.
   ///
-  void EmitEndOfLineMatrix(unsigned SectionEnd);
+  void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// EmitDebugLines - Emit source line information.
+  /// emitDebugLines - Emit source line information.
   ///
-  void EmitDebugLines();
+  void emitDebugLines();
 
-  /// EmitCommonDebugFrame - Emit common frame info into a debug frame section.
+  /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
   ///
-  void EmitCommonDebugFrame();
+  void emitCommonDebugFrame();
 
-  /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame
+  /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
   /// section.
-  void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+  void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
 
-  void EmitDebugPubNamesPerCU(CompileUnit *Unit);
+  void emitDebugPubNamesPerCU(CompileUnit *Unit);
 
-  /// EmitDebugPubNames - Emit visible names into a debug pubnames section.
+  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
   ///
-  void EmitDebugPubNames();
+  void emitDebugPubNames();
 
-  /// EmitDebugStr - Emit visible names into a debug str section.
+  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
   ///
-  void EmitDebugStr();
+  void emitDebugPubTypes();
 
-  /// EmitDebugLoc - Emit visible names into a debug loc section.
+  /// emitDebugStr - Emit visible names into a debug str section.
   ///
-  void EmitDebugLoc();
+  void emitDebugStr();
+
+  /// emitDebugLoc - Emit visible names into a debug loc section.
+  ///
+  void emitDebugLoc();
 
   /// EmitDebugARanges - Emit visible names into a debug aranges section.
   ///
   void EmitDebugARanges();
 
-  /// EmitDebugRanges - Emit visible names into a debug ranges section.
+  /// emitDebugRanges - Emit visible names into a debug ranges section.
   ///
-  void EmitDebugRanges();
+  void emitDebugRanges();
 
-  /// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
+  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
   ///
-  void EmitDebugMacInfo();
+  void emitDebugMacInfo();
 
-  /// EmitDebugInlineInfo - Emit inline info using following format.
+  /// emitDebugInlineInfo - Emit inline info using following format.
   /// Section Header:
   /// 1. length of section
   /// 2. Dwarf version number
@@ -473,26 +480,25 @@ class DwarfDebug : public Dwarf {
   /// inlined instance; the die_offset points to the inlined_subroutine die in
   /// the __debug_info section, and the low_pc is the starting address  for the
   ///  inlining instance.
-  void EmitDebugInlineInfo();
+  void emitDebugInlineInfo();
 
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
   /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps
   /// as well.
-  unsigned GetOrCreateSourceID(const char *DirName,
-                               const char *FileName);
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  void ConstructCompileUnit(MDNode *N);
+  void constructCompileUnit(MDNode *N);
 
-  void ConstructGlobalVariableDIE(MDNode *N);
+  void constructGlobalVariableDIE(MDNode *N);
 
-  void ConstructSubprogram(MDNode *N);
+  void constructSubprogramDIE(MDNode *N);
 
   // FIXME: This should go away in favor of complex addresses.
   /// Find the type the programmer originally declared the variable to be
   /// and return that type.  Obsolete, use GetComplexAddrType instead.
   ///
-  DIType GetBlockByrefType(DIType Ty, std::string Name);
+  DIType getBlockByrefType(DIType Ty, std::string Name);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -505,30 +511,30 @@ public:
   /// be emitted.
   bool ShouldEmitDwarfDebug() const { return shouldEmit; }
 
-  /// BeginModule - Emit all Dwarf sections that should come prior to the
+  /// beginModule - Emit all Dwarf sections that should come prior to the
   /// content.
-  void BeginModule(Module *M, MachineModuleInfo *MMI);
+  void beginModule(Module *M, MachineModuleInfo *MMI);
 
-  /// EndModule - Emit all Dwarf sections that should come after the content.
+  /// endModule - Emit all Dwarf sections that should come after the content.
   ///
-  void EndModule();
+  void endModule();
 
-  /// BeginFunction - Gather pre-function debug information.  Assumes being
+  /// beginFunction - Gather pre-function debug information.  Assumes being
   /// emitted immediately after the function entry point.
-  void BeginFunction(MachineFunction *MF);
+  void beginFunction(MachineFunction *MF);
 
-  /// EndFunction - Gather and emit post-function debug information.
+  /// endFunction - Gather and emit post-function debug information.
   ///
-  void EndFunction(MachineFunction *MF);
+  void endFunction(MachineFunction *MF);
 
-  /// RecordSourceLine - Records location information and associates it with a 
+  /// recordSourceLine - Records location information and associates it with a 
   /// label. Returns a unique label ID used to generate a label and provide
   /// correspondence to the source line list.
-  unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+  unsigned recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
 
-  /// getRecordSourceLineCount - Return the number of source lines in the debug
+  /// getSourceLineCount - Return the number of source lines in the debug
   /// info.
-  unsigned getRecordSourceLineCount() const {
+  unsigned getSourceLineCount() const {
     return Lines.size();
   }
                             
@@ -540,22 +546,18 @@ public:
   unsigned getOrCreateSourceID(const std::string &DirName,
                                const std::string &FileName);
 
-  /// ExtractScopeInformation - Scan machine instructions in this function
+  /// extractScopeInformation - Scan machine instructions in this function
   /// and collect DbgScopes. Return true, if atleast one scope was found.
-  bool ExtractScopeInformation(MachineFunction *MF);
-
-  /// CollectVariableInfo - Populate DbgScope entries with variables' info.
-  void CollectVariableInfo();
+  bool extractScopeInformation(MachineFunction *MF);
 
-  /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
-  /// end with this machine instruction.
-  void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
+  /// collectVariableInfo - Populate DbgScope entries with variables' info.
+  void collectVariableInfo();
 
-  /// BeginScope - Process beginning of a scope starting at Label.
-  void BeginScope(const MachineInstr *MI, unsigned Label);
+  /// beginScope - Process beginning of a scope starting at Label.
+  void beginScope(const MachineInstr *MI, unsigned Label);
 
-  /// EndScope - Prcess end of a scope.
-  void EndScope(const MachineInstr *MI);
+  /// endScope - Prcess end of a scope.
+  void endScope(const MachineInstr *MI);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index fcdcfd31bc3ee..1c8b8f4647209 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -727,8 +727,7 @@ void DwarfException::EmitExceptionTable() {
     // somewhere.  This predicate should be moved to a shared location that is
     // in target-independent code.
     //
-    if ((LSDASection->getKind().isWriteable() &&
-         !LSDASection->getKind().isReadOnlyWithRel()) ||
+    if (LSDASection->getKind().isWriteable() ||
         Asm->TM.getRelocationModel() == Reloc::Static)
       TTypeFormat = dwarf::DW_EH_PE_absptr;
     else
@@ -918,36 +917,14 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Catch TypeInfos.
-  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  unsigned Index = 1;
-
   for (std::vector<GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
-    const GlobalVariable *TI = *I;
-
-    if (TI) {
-      if (!LSDASection->getKind().isReadOnlyWithRel() &&
-          (TTypeFormat == dwarf::DW_EH_PE_absptr ||
-           TI->getLinkage() == GlobalValue::InternalLinkage)) {
-        // Print out the unadorned name of the type info.
-        PrintRelDirective();
-        O << Asm->Mang->getMangledName(TI);
-      } else {
-        bool IsTypeInfoIndirect = false, IsTypeInfoPCRel = false;
-        const MCExpr *TypeInfoRef =
-          TLOF.getSymbolForDwarfGlobalReference(TI, Asm->Mang, Asm->MMI,
-                                                IsTypeInfoIndirect,
-                                                IsTypeInfoPCRel);
-
-        if (!IsTypeInfoPCRel)
-          TypeInfoRef = CreateLabelDiff(TypeInfoRef, "typeinforef_addr",
-                                        Index++);
-
-        O << MAI->getData32bitsDirective();
-        TypeInfoRef->print(O, MAI);
-      }
+    const GlobalVariable *GV = *I;
+    PrintRelDirective();
+
+    if (GV) {
+      O << Asm->Mang->getMangledName(GV);
     } else {
-      PrintRelDirective();
       O << "0x0";
     }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 63ae653680584..dd8d88a2e4af0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -43,14 +43,14 @@ void DwarfWriter::BeginModule(Module *M,
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
   DE->BeginModule(M, MMI);
-  DD->BeginModule(M, MMI);
+  DD->beginModule(M, MMI);
 }
 
 /// EndModule - Emit all Dwarf sections that should come after the content.
 ///
 void DwarfWriter::EndModule() {
   DE->EndModule();
-  DD->EndModule();
+  DD->endModule();
   delete DD; DD = 0;
   delete DE; DE = 0;
 }
@@ -59,13 +59,13 @@ void DwarfWriter::EndModule() {
 /// emitted immediately after the function entry point.
 void DwarfWriter::BeginFunction(MachineFunction *MF) {
   DE->BeginFunction(MF);
-  DD->BeginFunction(MF);
+  DD->beginFunction(MF);
 }
 
 /// EndFunction - Gather and emit post-function debug information.
 ///
 void DwarfWriter::EndFunction(MachineFunction *MF) {
-  DD->EndFunction(MF);
+  DD->endFunction(MF);
   DE->EndFunction();
 
   if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI())
@@ -78,12 +78,12 @@ void DwarfWriter::EndFunction(MachineFunction *MF) {
 /// correspondence to the source line list.
 unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, 
                                        MDNode *Scope) {
-  return DD->RecordSourceLine(Line, Col, Scope);
+  return DD->recordSourceLine(Line, Col, Scope);
 }
 
 /// getRecordSourceLineCount - Count source lines.
 unsigned DwarfWriter::getRecordSourceLineCount() {
-  return DD->getRecordSourceLineCount();
+  return DD->getSourceLineCount();
 }
 
 /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
@@ -93,8 +93,8 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const {
 }
 
 void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
-  DD->BeginScope(MI, L);
+  DD->beginScope(MI, L);
 }
 void DwarfWriter::EndScope(const MachineInstr *MI) {
-  DD->EndScope(MI);
+  DD->endScope(MI);
 }
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f807e8fa261ea..8a62eb20bbb44 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -41,8 +41,6 @@ using namespace llvm;
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
-STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
-STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -205,16 +203,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     MadeChange |= MadeChangeThisIteration;
   }
 
-  // Do tail duplication after tail merging is done.  Otherwise it is
-  // tough to avoid situations where tail duplication and tail merging undo
-  // each other's transformations ad infinitum.
-  MadeChangeThisIteration = true;
-  while (MadeChangeThisIteration) {
-    MadeChangeThisIteration = false;
-    MadeChangeThisIteration |= TailDuplicateBlocks(MF);
-    MadeChange |= MadeChangeThisIteration;
-  }
-
   // See if any jump tables have become mergable or dead as the code generator
   // did its thing.
   MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
@@ -918,71 +906,6 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
 }
 
 
-/// CanFallThrough - Return true if the specified block (with the specified
-/// branch condition) can implicitly transfer control to the block after it by
-/// falling off the end of it.  This should return false if it can reach the
-/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
-///
-/// True is a conservative answer.
-///
-bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
-                                  bool BranchUnAnalyzable,
-                                  MachineBasicBlock *TBB,
-                                  MachineBasicBlock *FBB,
-                                  const SmallVectorImpl<MachineOperand> &Cond) {
-  MachineFunction::iterator Fallthrough = CurBB;
-  ++Fallthrough;
-  // If FallthroughBlock is off the end of the function, it can't fall through.
-  if (Fallthrough == CurBB->getParent()->end())
-    return false;
-
-  // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
-  if (!CurBB->isSuccessor(Fallthrough))
-    return false;
-
-  // If we couldn't analyze the branch, examine the last instruction.
-  // If the block doesn't end in a known control barrier, assume fallthrough
-  // is possible. The isPredicable check is needed because this code can be
-  // called during IfConversion, where an instruction which is normally a
-  // Barrier is predicated and thus no longer an actual control barrier. This
-  // is over-conservative though, because if an instruction isn't actually
-  // predicated we could still treat it like a barrier.
-  if (BranchUnAnalyzable)
-    return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
-           CurBB->back().getDesc().isPredicable();
-
-  // If there is no branch, control always falls through.
-  if (TBB == 0) return true;
-
-  // If there is some explicit branch to the fallthrough block, it can obviously
-  // reach, even though the branch should get folded to fall through implicitly.
-  if (MachineFunction::iterator(TBB) == Fallthrough ||
-      MachineFunction::iterator(FBB) == Fallthrough)
-    return true;
-
-  // If it's an unconditional branch to some block not the fall through, it
-  // doesn't fall through.
-  if (Cond.empty()) return false;
-
-  // Otherwise, if it is conditional and has no explicit false block, it falls
-  // through.
-  return FBB == 0;
-}
-
-/// CanFallThrough - Return true if the specified can implicitly transfer
-/// control to the block after it by falling off the end of it.  This should
-/// return false if it can reach the block after it, but it uses an explicit
-/// branch to do so (e.g. a table jump).
-///
-/// True is a conservative answer.
-///
-bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true);
-  return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
-}
-
 /// IsBetterFallthrough - Return true if it would be clearly better to
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
@@ -1005,143 +928,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
 }
 
-/// TailDuplicateBlocks - Look for small blocks that are unconditionally
-/// branched to and do not fall through. Tail-duplicate their instructions
-/// into their predecessors to eliminate (dynamic) branches.
-bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
-  bool MadeChange = false;
-
-  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
-    MachineBasicBlock *MBB = I++;
-
-    // Only duplicate blocks that end with unconditional branches.
-    if (CanFallThrough(MBB))
-      continue;
-
-    MadeChange |= TailDuplicate(MBB, MF);
-
-    // If it is dead, remove it.
-    if (MBB->pred_empty()) {
-      NumInstrDups -= MBB->size();
-      RemoveDeadBlock(MBB);
-      MadeChange = true;
-      ++NumDeadBlocks;
-    }
-  }
-  return MadeChange;
-}
-
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
-/// of its predecessors.
-bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
-                                 MachineFunction &MF) {
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB->isSuccessor(TailBB))
-    return false;
-
-  // Set the limit on the number of instructions to duplicate, with a default
-  // of one less than the tail-merge threshold. When optimizing for size,
-  // duplicate only one, because one branch instruction can be eliminated to
-  // compensate for the duplication.
-  unsigned MaxDuplicateCount =
-    MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
-    1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1);
-
-  // Check the instructions in the block to determine whether tail-duplication
-  // is invalid or unlikely to be profitable.
-  unsigned i = 0;
-  bool HasCall = false;
-  for (MachineBasicBlock::iterator I = TailBB->begin();
-       I != TailBB->end(); ++I, ++i) {
-    // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
-    // Don't duplicate more than the threshold.
-    if (i == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
-  }
-  // Heuristically, don't tail-duplicate calls if it would expand code size,
-  // as it's less likely to be worth the extra cost.
-  if (i > 1 && HasCall)
-    return false;
-
-  // Iterate through all the unique predecessors and tail-duplicate this
-  // block into them, if possible. Copying the list ahead of time also
-  // avoids trouble with the predecessor list reallocating.
-  bool Changed = false;
-  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
-                                               TailBB->pred_end());
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-       PE = Preds.end(); PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
-    assert(TailBB != PredBB &&
-           "Single-block loop should have been rejected earlier!");
-    if (PredBB->succ_size() > 1) continue;
-
-    MachineBasicBlock *PredTBB, *PredFBB;
-    SmallVector<MachineOperand, 4> PredCond;
-    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
-      continue;
-    if (!PredCond.empty())
-      continue;
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() != 1)
-      continue;
-    // Don't duplicate into a fall-through predecessor (at least for now).
-    if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB))
-      continue;
-
-    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
-                 << "From Succ: " << *TailBB);
-
-    // Remove PredBB's unconditional branch.
-    TII->RemoveBranch(*PredBB);
-    // Clone the contents of TailBB into PredBB.
-    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
-         I != E; ++I) {
-      MachineInstr *NewMI = MF.CloneMachineInstr(I);
-      PredBB->insert(PredBB->end(), NewMI);
-    }
-    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
-
-    // Update the CFG.
-    PredBB->removeSuccessor(PredBB->succ_begin());
-    assert(PredBB->succ_empty() &&
-           "TailDuplicate called on block with multiple successors!");
-    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
-         E = TailBB->succ_end(); I != E; ++I)
-       PredBB->addSuccessor(*I);
-
-    Changed = true;
-    ++NumTailDups;
-  }
-
-  // If TailBB was duplicated into all its predecessors except for the prior
-  // block, which falls through unconditionally, move the contents of this
-  // block into the prior block.
-  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
-  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
-  SmallVector<MachineOperand, 4> PriorCond;
-  bool PriorUnAnalyzable =
-    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
-  // This has to check PrevBB->succ_size() because EH edges are ignored by
-  // AnalyzeBranch.
-  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
-      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
-      !TailBB->hasAddressTaken()) {
-    DEBUG(errs() << "\nMerging into block: " << PrevBB
-          << "From MBB: " << *TailBB);
-    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
-    PrevBB.removeSuccessor(PrevBB.succ_begin());;
-    assert(PrevBB.succ_empty());
-    PrevBB.transferSuccessors(TailBB);
-    Changed = true;
-  }
-
-  return Changed;
-}
-
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
@@ -1266,7 +1052,7 @@ ReoptimizeBlock:
     // the assert condition out of the loop body.
     if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
         MachineFunction::iterator(PriorTBB) == FallThrough &&
-        !CanFallThrough(MBB)) {
+        !MBB->canFallThrough()) {
       bool DoTransform = true;
 
       // We have to be careful that the succs of PredBB aren't both no-successor
@@ -1290,7 +1076,7 @@ ReoptimizeBlock:
       // In this case, we could actually be moving the return block *into* a
       // loop!
       if (DoTransform && !MBB->succ_empty() &&
-          (!CanFallThrough(PriorTBB) || PriorTBB->empty()))
+          (!PriorTBB->canFallThrough() || PriorTBB->empty()))
         DoTransform = false;
 
 
@@ -1422,13 +1208,11 @@ ReoptimizeBlock:
   // If the prior block doesn't fall through into this block, and if this
   // block doesn't fall through into some other block, see if we can find a
   // place to move this block where a fall-through will happen.
-  if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
-                      PriorTBB, PriorFBB, PriorCond)) {
+  if (!PrevBB.canFallThrough()) {
 
     // Now we know that there was no fall-through into this block, check to
     // see if it has a fall-through into its successor.
-    bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
-                                       CurCond);
+    bool CurFallsThru = MBB->canFallThrough();
 
     if (!MBB->isLandingPad()) {
       // Check all the predecessors of this block.  If one of them has no fall
@@ -1440,7 +1224,7 @@ ReoptimizeBlock:
         MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
         MachineBasicBlock *PredTBB, *PredFBB;
         SmallVector<MachineOperand, 4> PredCond;
-        if (PredBB != MBB && !CanFallThrough(PredBB) &&
+        if (PredBB != MBB && !PredBB->canFallThrough() &&
             !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
             && (!CurFallsThru || !CurTBB || !CurFBB)
             && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
@@ -1479,7 +1263,7 @@ ReoptimizeBlock:
         // and if the successor isn't an EH destination, we can arrange for the
         // fallthrough to happen.
         if (SuccBB != MBB && &*SuccPrev != MBB &&
-            !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
+            !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
             !SuccBB->isLandingPad()) {
           MBB->moveBefore(SuccBB);
           MadeChange = true;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 4920755c227b7..b087395640608 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -105,18 +105,10 @@ namespace llvm {
     unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
                                        unsigned maxCommonTailLength);
 
-    bool TailDuplicateBlocks(MachineFunction &MF);
-    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
-    
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
     bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
-    
-    bool CanFallThrough(MachineBasicBlock *CurBB);
-    bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable,
-                        MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-                        const SmallVectorImpl<MachineOperand> &Cond);
   };
 }
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9583edcbe44af..6f86614c90d80 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -63,6 +63,7 @@ add_llvm_library(LLVMCodeGen
   StackProtector.cpp
   StackSlotColoring.cpp
   StrongPHIElimination.cpp
+  TailDuplication.cpp
   TargetInstrInfoImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 984e0135b8c64..1b39fec395fae 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -316,7 +316,6 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
 
 unsigned CriticalAntiDepBreaker::
 BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                      CandidateMap& Candidates,
                       MachineBasicBlock::iterator& Begin,
                       MachineBasicBlock::iterator& End,
                       unsigned InsertPosIndex) {
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 5664d852fdfe0..496888d45f9d0 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include <map>
 
 namespace llvm {
   class CriticalAntiDepBreaker : public AntiDepBreaker {
@@ -64,13 +65,6 @@ namespace llvm {
     CriticalAntiDepBreaker(MachineFunction& MFi);
     ~CriticalAntiDepBreaker();
     
-    /// GetMaxTrials - Critical path anti-dependence breaking requires
-    /// only a single pass
-    unsigned GetMaxTrials() { return 1; }
-
-    /// NeedCandidates - Candidates not needed.
-    bool NeedCandidates() { return false; }
-
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
@@ -78,7 +72,6 @@ namespace llvm {
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
-                                   CandidateMap& Candidates,
                                    MachineBasicBlock::iterator& Begin,
                                    MachineBasicBlock::iterator& End,
                                    unsigned InsertPosIndex);
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7fc62a9a96c42..9b516ed75a888 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -332,7 +332,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() {
   if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
     // Turn the exception temporary into registers and phi nodes if possible.
     std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
-    PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext());
+    PromoteMemToReg(Allocas, *DT, *DF);
     return true;
   }
   return false;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 45f08b168a496..c23d7070a34e3 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -608,7 +608,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (TII->DefinesPredicate(I, PredDefs))
       BBI.ClobbersPred = true;
 
-    if (!TID.isPredicable()) {
+    if (!TII->isPredicable(I)) {
       BBI.IsUnpredicable = true;
       return;
     }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 0db459bb9163b..242cba5b64e39 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -35,6 +35,8 @@ static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
     cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
 static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
     cl::desc("Disable code placement"));
 static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
@@ -66,6 +68,11 @@ static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
+// Enable or disable an experimental optimization to split GEPs
+// and run a special GVN pass which does not examine loads, in
+// an effort to factor out redundancy implicit in complex GEPs.
+static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
+    cl::desc("Split GEPs and run no-load GVN"));
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      const std::string &TargetTriple)
@@ -223,6 +230,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                CodeGenOpt::Level OptLevel) {
   // Standard LLVM-Level Passes.
 
+  // Optionally, tun split-GEPs and no-load GVN.
+  if (EnableSplitGEPGVN) {
+    PM.add(createGEPSplitterPass());
+    PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+  }
+
   // Run loop strength reduction before anything else.
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
@@ -333,15 +346,17 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     printAndVerify(PM, "After BranchFolding");
   }
 
+  // Tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+    PM.add(createTailDuplicatePass());
+    printAndVerify(PM, "After TailDuplicate");
+  }
+
   PM.add(createGCMachineCodeAnalysisPass());
 
   if (PrintGCInfo)
     PM.add(createGCInfoPrinter(errs()));
 
-  // Fold redundant debug labels.
-  PM.add(createDebugLabelFoldingPass());
-  printAndVerify(PM, "After DebugLabelFolding");
-
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
     printAndVerify(PM, "After CodePlacementOpt");
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 23dce4a91a136..f1bd5735439dc 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,10 +55,6 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
   SUnit *OnlyAvailablePred = 0;
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     SUnit &Pred = *I->getSUnit();
     if (!Pred.isScheduled) {
       // We found an available, but not scheduled, predecessor.  If it's the
@@ -78,10 +74,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
   unsigned NumNodesBlocking = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     if (getSingleUnscheduledPred(I->getSUnit()) == SU)
       ++NumNodesBlocking;
   }
@@ -98,10 +90,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-      continue;
-
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
   }
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index bbfc82b5d96d2..24adf364e710c 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -136,7 +136,8 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
 
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
-    OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n";
+    OS << "BB#" << mbbi->getNumber()
+       << ":\t\t# derived from " << mbbi->getName() << "\n";
     for (MachineBasicBlock::iterator mii = mbbi->begin(),
            mie = mbbi->end(); mii != mie; ++mii) {
       OS << getInstructionIndex(mii) << '\t' << *mii;
@@ -658,7 +659,7 @@ void LiveIntervals::computeIntervals() {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
+    DEBUG(errs() << MBB->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -1094,6 +1095,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       NewVReg = mri_->createVirtualRegister(rc);
       vrm.grow();
       CreatedNewVReg = true;
+
+      // The new virtual register should get the same allocation hints as the
+      // old one.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+      if (Hint.first || Hint.second)
+        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
     }
 
     if (!TryFold)
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 16a79bb54e976..68f80acf1562f 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -279,6 +279,43 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
     PhysRegUse[SubReg] =  MI;
 }
 
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return false;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
+    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  return LastRefOrPartRef;
+}
+
 bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   MachineInstr *LastDef = PhysRegDef[Reg];
   MachineInstr *LastUse = PhysRegUse[Reg];
@@ -373,7 +410,16 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       if (NeedDef)
         PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
                                                  true/*IsDef*/, true/*IsImp*/));
-      LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+      if (LastSubRef)
+        LastSubRef->addRegisterKilled(SubReg, TRI, true);
+      else {
+        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+        PhysRegUse[SubReg] = LastRefOrPartRef;
+        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+             unsigned SSReg = *SSRegs; ++SSRegs)
+          PhysRegUse[SSReg] = LastRefOrPartRef;
+      }
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
@@ -656,35 +702,45 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
           .push_back(BBI->getOperand(i).getReg());
 }
 
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+                                      unsigned Reg,
+                                      MachineRegisterInfo &MRI) {
+  unsigned Num = MBB.getNumber();
+
+  // Reg is live-through.
+  if (AliveBlocks.test(Num))
+    return true;
+
+  // Registers defined in MBB cannot be live in.
+  const MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def && Def->getParent() == &MBB)
+    return false;
+
+ // Reg was not defined in MBB, was it killed here?
+  return findKill(&MBB);
+}
+
 /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
 /// variables that are live out of DomBB will be marked as passing live through
 /// BB.
 void LiveVariables::addNewBlock(MachineBasicBlock *BB,
-                                MachineBasicBlock *DomBB) {
+                                MachineBasicBlock *DomBB,
+                                MachineBasicBlock *SuccBB) {
   const unsigned NumNew = BB->getNumber();
-  const unsigned NumDom = DomBB->getNumber();
+
+  // All registers used by PHI nodes in SuccBB must be live through BB.
+  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+         BBE = SuccBB->end();
+       BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI)
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+      if (BBI->getOperand(i+1).getMBB() == BB)
+        getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
 
   // Update info for all live variables
   for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
          E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
     VarInfo &VI = getVarInfo(Reg);
-
-    // Anything live through DomBB is also live through BB.
-    if (VI.AliveBlocks.test(NumDom)) {
+    if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
       VI.AliveBlocks.set(NumNew);
-      continue;
-    }
-
-    // Variables not defined in DomBB cannot be live out.
-    const MachineInstr *Def = MRI->getVRegDef(Reg);
-    if (!Def || Def->getParent() != DomBB)
-      continue;
-
-    // Killed by DomBB?
-    if (VI.findKill(DomBB))
-      continue;
-
-    // This register is defined in DomBB and live out
-    VI.AliveBlocks.set(NumNew);
   }
 }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index cd52825d21f12..e55e3694bcc47 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -172,6 +172,13 @@ static inline void OutputReg(raw_ostream &os, unsigned RegNo,
     os << " %reg" << RegNo;
 }
 
+StringRef MachineBasicBlock::getName() const {
+  if (const BasicBlock *LBB = getBasicBlock())
+    return LBB->getName();
+  else
+    return "(null)";
+}
+
 void MachineBasicBlock::print(raw_ostream &OS) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
@@ -272,8 +279,9 @@ void MachineBasicBlock::updateTerminator() {
       // successors is its layout successor, rewrite it to a fallthrough
       // conditional branch.
       if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond))
+          return;
         TII->RemoveBranch(*this);
-        TII->ReverseBranchCondition(Cond);
         TII->InsertBranch(*this, FBB, 0, Cond);
       } else if (isLayoutSuccessor(FBB)) {
         TII->RemoveBranch(*this);
@@ -285,8 +293,13 @@ void MachineBasicBlock::updateTerminator() {
       MachineBasicBlock *MBBB = *next(succ_begin());
       if (MBBA == TBB) std::swap(MBBB, MBBA);
       if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond)) {
+          // We can't reverse the condition, add an unconditional branch.
+          Cond.clear();
+          TII->InsertBranch(*this, MBBA, 0, Cond);
+          return;
+        }
         TII->RemoveBranch(*this);
-        TII->ReverseBranchCondition(Cond);
         TII->InsertBranch(*this, MBBA, 0, Cond);
       } else if (!isLayoutSuccessor(MBBA)) {
         TII->RemoveBranch(*this);
@@ -349,6 +362,51 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
   return next(I) == MachineFunction::const_iterator(MBB);
 }
 
+bool MachineBasicBlock::canFallThrough() {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true);
+
+  MachineFunction::iterator Fallthrough = this;
+  ++Fallthrough;
+  // If FallthroughBlock is off the end of the function, it can't fall through.
+  if (Fallthrough == getParent()->end())
+    return false;
+
+  // If FallthroughBlock isn't a successor, no fallthrough is possible.
+  if (!isSuccessor(Fallthrough))
+    return false;
+
+  // If we couldn't analyze the branch, examine the last instruction.
+  // If the block doesn't end in a known control barrier, assume fallthrough
+  // is possible. The isPredicable check is needed because this code can be
+  // called during IfConversion, where an instruction which is normally a
+  // Barrier is predicated and thus no longer an actual control barrier. This
+  // is over-conservative though, because if an instruction isn't actually
+  // predicated we could still treat it like a barrier.
+  if (BranchUnAnalyzable)
+    return empty() || !back().getDesc().isBarrier() ||
+           back().getDesc().isPredicable();
+
+  // If there is no branch, control always falls through.
+  if (TBB == 0) return true;
+
+  // If there is some explicit branch to the fallthrough block, it can obviously
+  // reach, even though the branch should get folded to fall through implicitly.
+  if (MachineFunction::iterator(TBB) == Fallthrough ||
+      MachineFunction::iterator(FBB) == Fallthrough)
+    return true;
+
+  // If it's an unconditional branch to some block not the fall through, it
+  // doesn't fall through.
+  if (Cond.empty()) return false;
+
+  // Otherwise, if it is conditional and has no explicit false block, it falls
+  // through.
+  return FBB == 0;
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing function,
 /// and returns it, but does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 81d1301336b8b..d20f4464e502d 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -359,14 +359,16 @@ void MachineFunction::print(raw_ostream &OS) const {
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static std::string getGraphName(const MachineFunction *F) {
       return "CFG for '" + F->getFunction()->getNameStr() + "' function";
     }
 
-    static std::string getNodeLabel(const MachineBasicBlock *Node,
-                                    const MachineFunction *Graph,
-                                    bool ShortNames) {
-      if (ShortNames && Node->getBasicBlock() &&
+    std::string getNodeLabel(const MachineBasicBlock *Node,
+                             const MachineFunction *Graph) {
+      if (isSimple () && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
         return Node->getBasicBlock()->getNameStr() + ":";
 
@@ -374,7 +376,7 @@ namespace llvm {
       {
         raw_string_ostream OSS(OutStr);
         
-        if (ShortNames)
+        if (isSimple())
           OSS << Node->getNumber() << ':';
         else
           Node->print(OSS);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b250faa62ae61..f73a5a362112f 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1148,10 +1148,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     // TODO: print InlinedAtLoc information
 
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
-    DICompileUnit CU(DLT.Scope);
-    if (!CU.isNull())
-      OS << " dbg:" << CU.getDirectory() << '/' << CU.getFilename() << ":"
-         << DLT.Line << ":" << DLT.Col;
+    DIScope Scope(DLT.Scope);
+    OS << " dbg:";
+    if (!Scope.isNull())
+      OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':';
+    OS << DLT.Line << ":" << DLT.Col;
   }
 
   OS << "\n";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 33b6b823446e6..66de5359df994 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -107,6 +107,10 @@ namespace {
     ///
     void HoistRegion(MachineDomTreeNode *N);
 
+    /// isLoadFromConstantMemory - Return true if the given instruction is a
+    /// load from constant memory.
+    bool isLoadFromConstantMemory(MachineInstr *MI);
+
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
     /// load, or null if the load couldn't be unfolded or if it wouldn't
@@ -338,6 +342,24 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
   return false;
 }
 
+/// isLoadFromConstantMemory - Return true if the given instruction is a
+/// load from constant memory. Machine LICM will hoist these even if they are
+/// not re-materializable.
+bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
+  if (!MI->getDesc().mayLoad()) return false;
+  if (!MI->hasOneMemOperand()) return false;
+  MachineMemOperand *MMO = *MI->memoperands_begin();
+  if (MMO->isVolatile()) return false;
+  if (!MMO->getValue()) return false;
+  const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
+  if (PSV) {
+    MachineFunction &MF = *MI->getParent()->getParent();
+    return PSV->isConstant(MF.getFrameInfo());
+  } else {
+    return AA->pointsToConstantMemory(MMO->getValue());
+  }
+}
+
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
 /// the given loop invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
@@ -347,8 +369,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   // FIXME: For now, only hoist re-materilizable instructions. LICM will
   // increase register pressure. We want to make sure it doesn't increase
   // spilling.
-  if (!TII->isTriviallyReMaterializable(&MI, AA))
-    return false;
+  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+  // these tend to help performance in low register pressure situation. The
+  // trade off is it may cause spill in high pressure situation. It will end up
+  // adding a store in the loop preheader. But the reload is no more expensive.
+  // The side benefit is these loads are frequently CSE'ed.
+  if (!TII->isTriviallyReMaterializable(&MI, AA)) {
+    if (!isLoadFromConstantMemory(&MI))
+      return false;
+  }
 
   // If result(s) of this instruction is used by PHIs, then don't hoist it.
   // The presence of joins makes it difficult for current register allocator
@@ -368,18 +397,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!MI->getDesc().mayLoad()) return 0;
-  if (!MI->hasOneMemOperand()) return 0;
-  MachineMemOperand *MMO = *MI->memoperands_begin();
-  if (MMO->isVolatile()) return 0;
-  MachineFunction &MF = *MI->getParent()->getParent();
-  if (!MMO->getValue()) return 0;
-  if (const PseudoSourceValue *PSV =
-        dyn_cast<PseudoSourceValue>(MMO->getValue())) {
-    if (!PSV->isConstant(MF.getFrameInfo())) return 0;
-  } else {
-    if (!AA->pointsToConstantMemory(MMO->getValue())) return 0;
-  }
+  if (!isLoadFromConstantMemory(MI))
+    return 0;
+
   // Next determine the register class for a temporary register.
   unsigned LoadRegIndex;
   unsigned NewOpc =
@@ -393,6 +413,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   unsigned Reg = RegInfo->createVirtualRegister(RC);
+
+  MachineFunction &MF = *MI->getParent()->getParent();
   SmallVector<MachineInstr *, 2> NewMIs;
   bool Success =
     TII->unfoldMemoryOperand(MF, MI, Reg,
@@ -487,10 +509,10 @@ void MachineLICM::Hoist(MachineInstr *MI) {
       errs() << "Hoisting " << *MI;
       if (CurPreheader->getBasicBlock())
         errs() << " to MachineBasicBlock "
-               << CurPreheader->getBasicBlock()->getName();
+               << CurPreheader->getName();
       if (MI->getParent()->getBasicBlock())
         errs() << " from MachineBasicBlock "
-               << MI->getParent()->getBasicBlock()->getName();
+               << MI->getParent()->getName();
       errs() << "\n";
     });
 
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 4b067a0aa98bf..ed5bb5e5410c3 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -293,75 +293,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   return 0;
 }
 
-//===----------------------------------------------------------------------===//
-/// DebugLabelFolding pass - This pass prunes out redundant labels.  This allows
-/// a info consumer to determine if the range of two labels is empty, by seeing
-/// if the labels map to the same reduced label.
-
-namespace llvm {
-
-struct DebugLabelFolder : public MachineFunctionPass {
-  static char ID;
-  DebugLabelFolder() : MachineFunctionPass(&ID) {}
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesCFG();
-    AU.addPreservedID(MachineLoopInfoID);
-    AU.addPreservedID(MachineDominatorsID);
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  virtual bool runOnMachineFunction(MachineFunction &MF);
-  virtual const char *getPassName() const { return "Label Folder"; }
-};
-
-char DebugLabelFolder::ID = 0;
-
-bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) {
-  // Get machine module info.
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (!MMI) return false;
-
-  // Track if change is made.
-  bool MadeChange = false;
-  // No prior label to begin.
-  unsigned PriorLabel = 0;
-
-  // Iterate through basic blocks.
-  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
-       BB != E; ++BB) {
-    // Iterate through instructions.
-    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-      // Is it a label.
-      if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){
-        // The label ID # is always operand #0, an immediate.
-        unsigned NextLabel = I->getOperand(0).getImm();
-
-        // If there was an immediate prior label.
-        if (PriorLabel) {
-          // Remap the current label to prior label.
-          MMI->RemapLabel(NextLabel, PriorLabel);
-          // Delete the current label.
-          I = BB->erase(I);
-          // Indicate a change has been made.
-          MadeChange = true;
-          continue;
-        } else {
-          // Start a new round.
-          PriorLabel = NextLabel;
-        }
-       } else {
-        // No consecutive labels.
-        PriorLabel = 0;
-      }
-
-      ++I;
-    }
-  }
-
-  return MadeChange;
-}
-
-FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); }
-
-}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index a1c74c0c48a4c..d9f4c997b905d 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -305,7 +305,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr()
+  *OS << "- basic block: " << MBB->getName()
       << " " << (void*)MBB
       << " (BB#" << MBB->getNumber() << ")\n";
 }
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b3802ed6725d0..2e30cc6abd328 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -353,7 +353,7 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // We break edges when registers are live out from the predecessor block
       // (not considering PHI nodes). If the register is live in to this block
       // anyway, we would gain nothing from splitting.
-      if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV))
+      if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV))
         SplitCriticalEdge(PreMBB, &MBB);
     }
   }
@@ -406,22 +406,6 @@ bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
   return false;
 }
 
-bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
-                                    LiveVariables &LV) {
-  LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
-
-  if (VI.AliveBlocks.test(MBB.getNumber()))
-    return true;
-
-  // defined in MBB?
-  const MachineInstr *Def = MRI->getVRegDef(Reg);
-  if (Def && Def->getParent() == &MBB)
-    return false;
-
-  // killed in MBB?
-  return VI.findKill(&MBB);
-}
-
 MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
                                                      MachineBasicBlock *B) {
   assert(A && B && "Missing MBB end point");
@@ -439,21 +423,21 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
   ++NumSplits;
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  MF->push_back(NMBB);
+  MF->insert(next(MachineFunction::iterator(A)), NMBB);
   DEBUG(errs() << "PHIElimination splitting critical edge:"
         " BB#" << A->getNumber()
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << B->getNumber() << '\n');
 
   A->ReplaceUsesOfBlockWith(B, NMBB);
-  // If A may fall through to B, we may have to insert a branch.
-  if (A->isLayoutSuccessor(B))
-    A->updateTerminator();
+  A->updateTerminator();
 
-  // Insert unconditional "jump B" instruction in NMBB.
+  // Insert unconditional "jump B" instruction in NMBB if necessary.
   NMBB->addSuccessor(B);
-  Cond.clear();
-  MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+  if (!NMBB->isLayoutSuccessor(B)) {
+    Cond.clear();
+    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+  }
 
   // Fix PHI nodes in B so they refer to NMBB instead of A
   for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
@@ -463,7 +447,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
         i->getOperand(ni+1).setMBB(NMBB);
 
   if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
-    LV->addNewBlock(NMBB, A);
+    LV->addNewBlock(NMBB, A, B);
 
   if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
     MDT->addNewBlock(NMBB, A);
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index f8c9fe7284570..f5872cbe8d548 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -99,12 +99,6 @@ namespace llvm {
     bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
                    LiveVariables &LV);
 
-    /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI
-    /// source registers. This means that Reg is either killed by MBB or passes
-    /// through it.
-    bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
-                  LiveVariables &LV);
-
     /// SplitCriticalEdge - Split a critical edge from A to B by
     /// inserting a new MBB. Update branches in A and PHI instructions
     /// in B. Return the new block.
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 5f1f1f3580c1b..9101fce27a6f0 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -175,11 +175,10 @@ namespace {
     void FixupKills(MachineBasicBlock *MBB);
 
   private:
-    void ReleaseSucc(SUnit *SU, SDep *SuccEdge, bool IgnoreAntiDep);
-    void ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep);
-    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, bool IgnoreAntiDep);
-    void ListScheduleTopDown(
-           AntiDepBreaker::CandidateMap *AntiDepCandidates);
+    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+    void ReleaseSuccessors(SUnit *SU);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+    void ListScheduleTopDown();
     void StartBlockForKills(MachineBasicBlock *BB);
     
     // ToggleKillFlag - Toggle a register operand kill flag. Other
@@ -322,50 +321,24 @@ void SchedulePostRATDList::Schedule() {
   BuildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
-    AntiDepBreaker::CandidateMap AntiDepCandidates;
-    const bool NeedCandidates = AntiDepBreak->NeedCandidates();
+    unsigned Broken = 
+      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+                                          InsertPosIndex);
     
-    for (unsigned i = 0, Trials = AntiDepBreak->GetMaxTrials();
-         i < Trials; ++i) {
-      DEBUG(errs() << "\n********** Break Anti-Deps, Trial " << 
-            i << " **********\n");
-      
-      // If candidates are required, then schedule forward ignoring
-      // anti-dependencies to collect the candidate operands for
-      // anti-dependence breaking. The candidates will be the def
-      // operands for the anti-dependencies that if broken would allow
-      // an improved schedule
-      if (NeedCandidates) {
-        DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-                SUnits[su].dumpAll(this));
-
-        AntiDepCandidates.clear();
-        AvailableQueue.initNodes(SUnits);
-        ListScheduleTopDown(&AntiDepCandidates);
-        AvailableQueue.releaseState();
-      }
-
-      unsigned Broken = 
-        AntiDepBreak->BreakAntiDependencies(SUnits, AntiDepCandidates,
-                                            Begin, InsertPos, InsertPosIndex);
-
+    if (Broken != 0) {
       // We made changes. Update the dependency graph.
       // Theoretically we could update the graph in place:
       // When a live range is changed to use a different register, remove
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      if ((Broken != 0) || NeedCandidates) {
-        SUnits.clear();
-        Sequence.clear();
-        EntrySU = SUnit();
-        ExitSU = SUnit();
-        BuildSchedGraph(AA);
-      }
-
+      SUnits.clear();
+      Sequence.clear();
+      EntrySU = SUnit();
+      ExitSU = SUnit();
+      BuildSchedGraph(AA);
+      
       NumFixedAnti += Broken;
-      if (Broken == 0)
-        break;
     }
   }
 
@@ -374,7 +347,7 @@ void SchedulePostRATDList::Schedule() {
           SUnits[su].dumpAll(this));
 
   AvailableQueue.initNodes(SUnits);
-  ListScheduleTopDown(NULL);
+  ListScheduleTopDown();
   AvailableQueue.releaseState();
 }
 
@@ -573,8 +546,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
 /// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
-                                       bool IgnoreAntiDep) {
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
 #ifndef NDEBUG
@@ -590,8 +562,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  SuccSU->setDepthToAtLeast(SU->getDepth(IgnoreAntiDep) +
-                            SuccEdge->getLatency(), IgnoreAntiDep);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
   
   // If all the node's predecessors are scheduled, this node is ready
   // to be scheduled. Ignore the special ExitSU node.
@@ -600,40 +571,34 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
 }
 
 /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
-void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    if (IgnoreAntiDep && 
-        ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
-      continue;
-    ReleaseSucc(SU, &*I, IgnoreAntiDep);
+    ReleaseSucc(SU, &*I);
   }
 }
 
 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
-void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle,
-                                               bool IgnoreAntiDep) {
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  assert(CurCycle >= SU->getDepth(IgnoreAntiDep) && 
+  assert(CurCycle >= SU->getDepth() && 
          "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle, IgnoreAntiDep);
+  SU->setDepthToAtLeast(CurCycle);
 
-  ReleaseSuccessors(SU, IgnoreAntiDep);
+  ReleaseSuccessors(SU);
   SU->isScheduled = true;
   AvailableQueue.ScheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
-void SchedulePostRATDList::ListScheduleTopDown(
-                   AntiDepBreaker::CandidateMap *AntiDepCandidates) {
+void SchedulePostRATDList::ListScheduleTopDown() {
   unsigned CurCycle = 0;
-  const bool IgnoreAntiDep = (AntiDepCandidates != NULL);
   
   // We're scheduling top-down but we're visiting the regions in
   // bottom-up order, so we don't know the hazards at the start of a
@@ -641,33 +606,13 @@ void SchedulePostRATDList::ListScheduleTopDown(
   // blocks are a single region).
   HazardRec->Reset();
 
-  // If ignoring anti-dependencies, the Schedule DAG still has Anti
-  // dep edges, but we ignore them for scheduling purposes
-  AvailableQueue.setIgnoreAntiDep(IgnoreAntiDep);
-
   // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU, IgnoreAntiDep);
+  ReleaseSuccessors(&EntrySU);
 
-  // Add all leaves to Available queue. If ignoring antideps we also
-  // adjust the predecessor count for each node to not include antidep
-  // edges.
+  // Add all leaves to Available queue.
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     // It is available if it has no predecessors.
     bool available = SUnits[i].Preds.empty();
-    // If we are ignoring anti-dependencies then a node that has only
-    // anti-dep predecessors is available.
-    if (!available && IgnoreAntiDep) {
-      available = true;
-      for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
-             E = SUnits[i].Preds.end(); I != E; ++I) {
-        if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output))  {
-          available = false;
-        } else {
-          SUnits[i].NumPredsLeft -= 1;
-        }
-      }
-    }
-
     if (available) {
       AvailableQueue.push(&SUnits[i]);
       SUnits[i].isAvailable = true;
@@ -687,21 +632,21 @@ void SchedulePostRATDList::ListScheduleTopDown(
     // so, add them to the available queue.
     unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->getDepth(IgnoreAntiDep) <= CurCycle) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else if (PendingQueue[i]->getDepth(IgnoreAntiDep) < MinDepth)
-        MinDepth = PendingQueue[i]->getDepth(IgnoreAntiDep);
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
     }
 
     DEBUG(errs() << "\n*** Examining Available\n";
           LatencyPriorityQueue q = AvailableQueue;
           while (!q.empty()) {
             SUnit *su = q.pop();
-            errs() << "Height " << su->getHeight(IgnoreAntiDep) << ": ";
+            errs() << "Height " << su->getHeight() << ": ";
             su->dump(this);
           });
 
@@ -731,30 +676,8 @@ void SchedulePostRATDList::ListScheduleTopDown(
 
     // If we found a node to schedule...
     if (FoundSUnit) {
-      // If we are ignoring anti-dependencies and the SUnit we are
-      // scheduling has an antidep predecessor that has not been
-      // scheduled, then we will need to break that antidep if we want
-      // to get this schedule when not ignoring anti-dependencies.
-      if (IgnoreAntiDep) {
-        AntiDepBreaker::AntiDepRegVector AntiDepRegs;
-        for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
-               E = FoundSUnit->Preds.end(); I != E; ++I) {
-          if (((I->getKind() == SDep::Anti) || 
-               (I->getKind() == SDep::Output)) &&
-              !I->getSUnit()->isScheduled)
-            AntiDepRegs.push_back(I->getReg());
-        }
-        
-        if (AntiDepRegs.size() > 0) {
-          DEBUG(errs() << "*** AntiDep Candidate: ");
-          DEBUG(FoundSUnit->dump(this));
-          AntiDepCandidates->insert(
-            AntiDepBreaker::CandidateMap::value_type(FoundSUnit, AntiDepRegs));
-        }
-      }
-
       // ... schedule the node...
-      ScheduleNodeTopDown(FoundSUnit, CurCycle, IgnoreAntiDep);
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
       CycleHasInsts = true;
 
@@ -775,8 +698,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
         // just advance the current cycle and try again.
         DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
-        if (!IgnoreAntiDep)
-          ++NumStalls;
+        ++NumStalls;
       } else {
         // Otherwise, we have no instructions to issue and we have instructions
         // that will fault if we don't do this right.  This is the case for
@@ -784,8 +706,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
         DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
         HazardRec->EmitNoop();
         Sequence.push_back(0);   // NULL here means noop
-        if (!IgnoreAntiDep)
-          ++NumNoops;
+        ++NumNoops;
       }
 
       ++CurCycle;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 455964b5c5ad4..c9a33d8851540 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -75,10 +75,11 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
   SmallSet<unsigned, 8> ImpDefRegs;
   SmallVector<MachineInstr*, 8> ImpDefMIs;
-  MachineBasicBlock *Entry = fn.begin();
+  SmallVector<MachineInstr*, 4> RUses;
   SmallPtrSet<MachineBasicBlock*,16> Visited;
   SmallPtrSet<MachineInstr*, 8> ModInsts;
 
+  MachineBasicBlock *Entry = fn.begin();
   for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
          DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
        DFI != E; ++DFI) {
@@ -182,53 +183,87 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // is not an implicit_def, do not insert implicit_def's before the
       // uses.
       bool Skip = false;
+      SmallVector<MachineInstr*, 4> DeadImpDefs;
       for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
              DE = mri_->def_end(); DI != DE; ++DI) {
-        if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
+        MachineInstr *DeadImpDef = &*DI;
+        if (DeadImpDef->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) {
           Skip = true;
           break;
         }
+        DeadImpDefs.push_back(DeadImpDef);
       }
       if (Skip)
         continue;
 
       // The only implicit_def which we want to keep are those that are live
       // out of its block.
-      MI->eraseFromParent();
+      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+        DeadImpDefs[j]->eraseFromParent();
       Changed = true;
 
+      // Process each use instruction once.
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
-             UE = mri_->use_end(); UI != UE; ) {
-        MachineOperand &RMO = UI.getOperand();
+             UE = mri_->use_end(); UI != UE; ++UI) {
         MachineInstr *RMI = &*UI;
-        ++UI;
-        if (ModInsts.count(RMI))
-          continue;
         MachineBasicBlock *RMBB = RMI->getParent();
         if (RMBB == MBB)
           continue;
+        if (ModInsts.insert(RMI))
+          RUses.push_back(RMI);
+      }
+
+      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+        MachineInstr *RMI = RUses[i];
 
         // Turn a copy use into an implicit_def.
         unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
         if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
             Reg == SrcReg) {
-          if (RMO.isKill()) {
+          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+
+          bool isKill = false;
+          SmallVector<unsigned, 4> Ops;
+          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &RRMO = RMI->getOperand(j);
+            if (RRMO.isReg() && RRMO.getReg() == Reg) {
+              Ops.push_back(j);
+              if (RRMO.isKill())
+                isKill = true;
+            }
+          }
+          // Leave the other operands along.
+          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+            unsigned OpIdx = Ops[j];
+            RMI->RemoveOperand(OpIdx-j);
+          }
+
+          // Update LiveVariables varinfo if the instruction is a kill.
+          if (isKill) {
             LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
             vi.removeKill(RMI);
           }
-          RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
-          for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
-            RMI->RemoveOperand(j);
-          ModInsts.insert(RMI);
           continue;
         }
 
+        // Replace Reg with a new vreg that's marked implicit.
         const TargetRegisterClass* RC = mri_->getRegClass(Reg);
         unsigned NewVReg = mri_->createVirtualRegister(RC);
-        RMO.setReg(NewVReg);
-        RMO.setIsUndef();
-        RMO.setIsKill();
+        bool isKill = true;
+        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &RRMO = RMI->getOperand(j);
+          if (RRMO.isReg() && RRMO.getReg() == Reg) {
+            RRMO.setReg(NewVReg);
+            RRMO.setIsUndef();
+            if (isKill) {
+              // Only the first operand of NewVReg is marked kill.
+              RRMO.setIsKill();
+              isKill = false;
+            }
+          }
+        }
       }
+      RUses.clear();
     }
     ModInsts.clear();
     ImpDefRegs.clear();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index fff50da947c11..4ff512932f8e0 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -64,9 +64,31 @@ linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
 
 namespace {
+  // When we allocate a register, add it to a fixed-size queue of
+  // registers to skip in subsequent allocations. This trades a small
+  // amount of register pressure and increased spills for flexibility in
+  // the post-pass scheduler.
+  //
+  // Note that in a the number of registers used for reloading spills
+  // will be one greater than the value of this option.
+  //
+  // One big limitation of this is that it doesn't differentiate between
+  // different register classes. So on x86-64, if there is xmm register
+  // pressure, it can caused fewer GPRs to be held in the queue.
+  static cl::opt<unsigned>
+  NumRecentlyUsedRegs("linearscan-skip-count",
+                      cl::desc("Number of registers for linearscan to remember to skip."),
+                      cl::init(0),
+                      cl::Hidden);
+ 
   struct RALinScan : public MachineFunctionPass {
     static char ID;
-    RALinScan() : MachineFunctionPass(&ID) {}
+    RALinScan() : MachineFunctionPass(&ID) {
+      // Initialize the queue to record recently-used registers.
+      if (NumRecentlyUsedRegs > 0)
+        RecentRegs.resize(NumRecentlyUsedRegs, 0);
+      RecentNext = RecentRegs.begin();
+    }
 
     typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
     typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
@@ -132,6 +154,20 @@ namespace {
 
     std::auto_ptr<Spiller> spiller_;
 
+    // The queue of recently-used registers.
+    SmallVector<unsigned, 4> RecentRegs;
+    SmallVector<unsigned, 4>::iterator RecentNext;
+
+    // Record that we just picked this register.
+    void recordRecentlyUsed(unsigned reg) {
+      assert(reg != 0 && "Recently used register is NOREG!");
+      if (!RecentRegs.empty()) {
+        *RecentNext++ = reg;
+        if (RecentNext == RecentRegs.end())
+          RecentNext = RecentRegs.begin();
+      }
+    }
+
   public:
     virtual const char* getPassName() const {
       return "Linear Scan Register Allocator";
@@ -161,6 +197,12 @@ namespace {
     /// runOnMachineFunction - register allocate the whole function
     bool runOnMachineFunction(MachineFunction&);
 
+    // Determine if we skip this register due to its being recently used.
+    bool isRecentlyUsed(unsigned reg) const {
+      return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
+             RecentRegs.end();
+    }
+
   private:
     /// linearScan - the linear scan algorithm
     void linearScan();
@@ -436,7 +478,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   vrm_ = &getAnalysis<VirtRegMap>();
   if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
   
-  spiller_.reset(createSpiller(mf_, li_, ls_, loopInfo, vrm_));
+  spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_));
   
   initIntervalSets();
 
@@ -833,9 +875,15 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
 
 namespace {
   struct WeightCompare {
+  private:
+    const RALinScan &Allocator;
+
+  public:
+    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {};
+
     typedef std::pair<unsigned, float> RegWeightPair;
     bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
-      return LHS.second < RHS.second;
+      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
     }
   };
 }
@@ -1079,7 +1127,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
            e = RC->allocation_order_end(*mf_); i != e; ++i) {
       unsigned reg = *i;
       float regWeight = SpillWeights[reg];
-      if (minWeight > regWeight)
+      // Skip recently allocated registers.
+      if (minWeight > regWeight && !isRecentlyUsed(reg))
         Found = true;
       RegsWeights.push_back(std::make_pair(reg, regWeight));
     }
@@ -1097,7 +1146,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   }
 
   // Sort all potential spill candidates by weight.
-  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare());
+  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
   minReg = RegsWeights[0].first;
   minWeight = RegsWeights[0].second;
   if (minWeight == HUGE_VALF) {
@@ -1360,7 +1409,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
-    if (isRegAvail(Reg)) {
+    // Skip recently allocated registers.
+    if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
       FreeReg = Reg;
       if (FreeReg < inactiveCounts.size())
         FreeRegInactiveCount = inactiveCounts[FreeReg];
@@ -1372,9 +1422,12 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
 
   // If there are no free regs, or if this reg has the max inactive count,
   // return this register.
-  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+    // Remember what register we picked so we can skip it next time.
+    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
     return FreeReg;
- 
+  }
+
   // Continue scanning the registers, looking for the one with the highest
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
@@ -1385,7 +1438,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
     if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg]) {
+        FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
       FreeReg = Reg;
       FreeRegInactiveCount = inactiveCounts[Reg];
       if (FreeRegInactiveCount == MaxInactiveCount)
@@ -1393,6 +1446,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
     }
   }
 
+  // Remember what register we picked so we can skip it next time.
+  recordRecentlyUsed(FreeReg);
+
   return FreeReg;
 }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6b27db263b252..71693d21c6889 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,8 @@ void SUnit::setHeightDirty() {
 /// setDepthToAtLeast - Update this node's successors to reflect the
 /// fact that this node's depth just increased.
 ///
-void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) {
-  if (NewDepth <= getDepth(IgnoreAntiDep))
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= getDepth())
     return;
   setDepthDirty();
   Depth = NewDepth;
@@ -194,8 +194,8 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) {
 /// setHeightToAtLeast - Update this node's predecessors to reflect the
 /// fact that this node's height just increased.
 ///
-void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) {
-  if (NewHeight <= getHeight(IgnoreAntiDep))
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= getHeight())
     return;
   setHeightDirty();
   Height = NewHeight;
@@ -204,7 +204,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) {
 
 /// ComputeDepth - Calculate the maximal path from the node to the exit.
 ///
-void SUnit::ComputeDepth(bool IgnoreAntiDep) {
+void SUnit::ComputeDepth() {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -214,10 +214,6 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
     unsigned MaxPredDepth = 0;
     for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
          E = Cur->Preds.end(); I != E; ++I) {
-      if (IgnoreAntiDep && 
-          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-        continue;
-
       SUnit *PredSU = I->getSUnit();
       if (PredSU->isDepthCurrent)
         MaxPredDepth = std::max(MaxPredDepth,
@@ -241,7 +237,7 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
 
 /// ComputeHeight - Calculate the maximal path from the node to the entry.
 ///
-void SUnit::ComputeHeight(bool IgnoreAntiDep) {
+void SUnit::ComputeHeight() {
   SmallVector<SUnit*, 8> WorkList;
   WorkList.push_back(this);
   do {
@@ -251,10 +247,6 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) {
     unsigned MaxSuccHeight = 0;
     for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
          E = Cur->Succs.end(); I != E; ++I) {
-      if (IgnoreAntiDep && 
-          ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) 
-        continue;
-
       SUnit *SuccSU = I->getSUnit();
       if (SuccSU->isHeightCurrent)
         MaxSuccHeight = std::max(MaxSuccHeight,
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4851d496bdbd3..027f6150e26b6 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -32,6 +32,9 @@ using namespace llvm;
 namespace llvm {
   template<>
   struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static std::string getGraphName(const ScheduleDAG *G) {
       return G->MF.getFunction()->getName();
     }
@@ -57,9 +60,7 @@ namespace llvm {
     }
     
 
-    static std::string getNodeLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph,
-                                    bool ShortNames);
+    std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
     static std::string getNodeAttributes(const SUnit *N,
                                          const ScheduleDAG *Graph) {
       return "shape=Mrecord";
@@ -73,8 +74,7 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
-                                                       const ScheduleDAG *G,
-                                                       bool ShortNames) {
+                                                       const ScheduleDAG *G) {
   return G->getGraphNodeLabel(SU);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index c766859ae9c87..80c7d7c9eb9ca 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG
   CallingConvLower.cpp
   DAGCombiner.cpp
   FastISel.cpp
+  FunctionLoweringInfo.cpp
   InstrEmitter.cpp
   LegalizeDAG.cpp
   LegalizeFloatTypes.cpp
@@ -15,7 +16,7 @@ add_llvm_library(LLVMSelectionDAG
   ScheduleDAGRRList.cpp
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
-  SelectionDAGBuild.cpp
+  SelectionDAGBuilder.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   TargetLowering.cpp
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 7dbc136f3a62a..5eb9ca1ebe02f 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -54,7 +54,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "SelectionDAGBuild.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
 using namespace llvm;
 
 unsigned FastISel::getRegForValue(Value *V) {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 0000000000000..e3b25c2a85cc7
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,355 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "FunctionLoweringInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty == Type::getVoidTy(Ty->getContext()))
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
+  if (isa<PHINode>(I)) return true;
+  BasicBlock *BB = I->getParent();
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+      return true;
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  // Don't force virtual registers for byval arguments though, because
+  // fast-isel can't handle those in all cases.
+  if (EnableFastISel && !A->hasByValAttr())
+    return A->use_empty();
+
+  BasicBlock *Entry = A->getParent()->begin();
+  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
+    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+      return false;  // Use not in entry block.
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
+  : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
+                               bool EnableFastISel) {
+  Fn = &fn;
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align =
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+        StaticAllocaMap[AI] =
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF->push_back(MBB);
+
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    PHINode *PN;
+    DebugLoc DL;
+    for (BasicBlock::iterator
+           I = BB->begin(), E = BB->end(); I != E; ++I) {
+
+      PN = dyn_cast<PHINode>(I);
+      if (!PN || PN->use_empty()) continue;
+
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+        for (unsigned i = 0; i != NumRegisters; ++i)
+          BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
+        PHIReg += NumRegisters;
+      }
+    }
+  }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+  MBBMap.clear();
+  ValueMap.clear();
+  StaticAllocaMap.clear();
+#ifndef NDEBUG
+  CatchInfoLost.clear();
+  CatchInfoFound.clear();
+#endif
+  LiveOutRegInfo.clear();
+}
+
+unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, V->getType(), ValueVTs);
+
+  unsigned FirstReg = 0;
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+
+    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      unsigned R = MakeReg(RegisterVT);
+      if (!FirstReg) FirstReg = R;
+    }
+  }
+  return FirstReg;
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+  assert ((GV || isa<ConstantPointerNull>(V)) &&
+          "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
+                        MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<GlobalVariable *> TyInfo;
+  unsigned N = I.getNumOperands();
+
+  for (unsigned i = N - 1; i > 2; --i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + !FilterLength;
+      assert (FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      if (!FilterLength) {
+        // Cleanup.
+        MMI->addCleanup(MBB);
+      } else {
+        // Filter.
+        TyInfo.reserve(FilterLength - 1);
+        for (unsigned j = i + 1; j < FirstCatch; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+        MMI->addFilterTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      N = i;
+    }
+  }
+
+  if (N > 3) {
+    TyInfo.reserve(N - 3);
+    for (unsigned j = 3; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
+    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+      // Apply the catch info to DestBB.
+      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
+#ifndef NDEBUG
+      if (!FLI.MBBMap[SrcBB]->isLandingPad())
+        FLI.CatchInfoFound.insert(EHSel);
+#endif
+    }
+}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
new file mode 100644
index 0000000000000..d851e6429c0cf
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
@@ -0,0 +1,151 @@
+//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FUNCTIONLOWERINGINFO_H
+#define FUNCTIONLOWERINGINFO_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetLowering;
+class Value;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+  TargetLowering &TLI;
+  Function *Fn;
+  MachineFunction *MF;
+  MachineRegisterInfo *RegInfo;
+
+  /// CanLowerReturn - true iff the function's return value can be lowered to
+  /// registers.
+  bool CanLowerReturn;
+
+  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+  /// allocated to hold a pointer to the hidden sret parameter.
+  unsigned DemoteRegister;
+
+  explicit FunctionLoweringInfo(TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(Function &Fn, MachineFunction &MF, bool EnableFastISel);
+
+  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+  /// ValueMap - Since we emit code for the function a basic block at a time,
+  /// we must remember which virtual registers hold the values for
+  /// cross-basic-block values.
+  DenseMap<const Value*, unsigned> ValueMap;
+
+  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+  /// the entry block.  This allows the allocas to be efficiently referenced
+  /// anywhere in the function.
+  DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+#ifndef NDEBUG
+  SmallSet<Instruction*, 8> CatchInfoLost;
+  SmallSet<Instruction*, 8> CatchInfoFound;
+#endif
+
+  unsigned MakeReg(EVT VT);
+  
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
+
+  unsigned CreateRegForValue(const Value *V);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegForValue(V);
+  }
+  
+  struct LiveOutInfo {
+    unsigned NumSignBits;
+    APInt KnownOne, KnownZero;
+    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
+  };
+  
+  /// LiveOutRegInfo - Information about live out vregs, indexed by their
+  /// register number offset by 'FirstVirtualRegister'.
+  std::vector<LiveOutInfo> LiveOutRegInfo;
+
+  /// clear - Clear out all the function-specific state. This returns this
+  /// FunctionLoweringInfo to an empty state, ready to be used for a
+  /// different function.
+  void clear();
+};
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = 0,
+                     uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+
+/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
+void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
+                   MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 52b0832b06162..669d414cefa28 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -350,7 +350,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
                                             ES->getTargetFlags()));
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress()));
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 4f0a229a505e9..273dbf0d5611a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -158,7 +158,6 @@ private:
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
-  SDValue ExpandDBG_STOPPOINT(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
   SDValue ExpandFCOPYSIGN(SDNode *Node);
@@ -1517,6 +1516,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // Create the stack frame object.
   EVT VT = Node->getValueType(0);
   EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
@@ -1524,7 +1524,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
 
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
-  unsigned TypeByteSize = OpVT.getSizeInBits() / 8;
+  unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
   // Store (in the right endianness) the elements to memory.
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
     // Ignore undef elements.
@@ -1535,8 +1535,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
-    Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
-                                  Idx, SV, Offset));
+    // If EltVT smaller than OpVT, only store the bits necessary.
+    if (EltVT.bitsLT(OpVT))
+      Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                          Node->getOperand(i), Idx, SV, Offset, EltVT));
+    else
+      Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
+                                    Node->getOperand(i), Idx, SV, Offset));
   }
 
   SDValue StoreChain;
@@ -1590,37 +1595,6 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
                      AbsVal);
 }
 
-SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) {
-  DebugLoc dl = Node->getDebugLoc();
-  DwarfWriter *DW = DAG.getDwarfWriter();
-  bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC,
-                                                    MVT::Other);
-  bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other);
-
-  const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(Node);
-  MDNode *CU_Node = DSP->getCompileUnit();
-  if (DW && (useDEBUG_LOC || useLABEL)) {
-
-    unsigned Line = DSP->getLine();
-    unsigned Col = DSP->getColumn();
-
-    if (OptLevel == CodeGenOpt::None) {
-      // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it
-      // won't hurt anything.
-      if (useDEBUG_LOC) {
-        return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0),
-                           DAG.getConstant(Line, MVT::i32),
-                           DAG.getConstant(Col, MVT::i32),
-                           DAG.getSrcValue(CU_Node));
-      } else {
-        unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node);
-        return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID);
-      }
-    }
-  }
-  return Node->getOperand(0);
-}
-
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
                                            SmallVectorImpl<SDValue> &Results) {
   unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
@@ -2269,16 +2243,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
     break;
   case ISD::EH_RETURN:
-  case ISD::DBG_LABEL:
   case ISD::EH_LABEL:
   case ISD::PREFETCH:
   case ISD::MEMBARRIER:
   case ISD::VAEND:
     Results.push_back(Node->getOperand(0));
     break;
-  case ISD::DBG_STOPPOINT:
-    Results.push_back(ExpandDBG_STOPPOINT(Node));
-    break;
   case ISD::DYNAMIC_STACKALLOC:
     ExpandDYNAMIC_STACKALLOC(Node, Results);
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index c4bd552f52ab9..003cea7a6f43b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -64,8 +64,12 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
   // The final node obtained by mapping by ReplacedValues is not marked NewNode.
   // Note that ReplacedValues should be applied iteratively.
 
-  // Note that the ReplacedValues map may also map deleted nodes.  By iterating
-  // over the DAG we only consider non-deleted nodes.
+  // Note that the ReplacedValues map may also map deleted nodes (by iterating
+  // over the DAG we never dereference deleted nodes).  This means that it may
+  // also map nodes marked NewNode if the deallocated memory was reallocated as
+  // another node, and that new node was not seen by the LegalizeTypes machinery
+  // (for example because it was created but not used).  In general, we cannot
+  // distinguish between new nodes and deleted nodes.
   SmallVector<SDNode*, 16> NewNodes;
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I) {
@@ -114,7 +118,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         Mapped |= 128;
 
       if (I->getNodeId() != Processed) {
-        if (Mapped != 0) {
+        // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+        // marked NewNode too, since a deleted node may have been reallocated as
+        // another node that has not been seen by the LegalizeTypes machinery.
+        if ((I->getNodeId() == NewNode && Mapped > 1) ||
+            (I->getNodeId() != NewNode && Mapped != 0)) {
           errs() << "Unprocessed value in a map!";
           Failed = true;
         }
@@ -320,16 +328,12 @@ ScanOperands:
         continue;
 
       // The node morphed - this is equivalent to legalizing by replacing every
-      // value of N with the corresponding value of M.  So do that now.  However
-      // there is no need to remember the replacement - morphing will make sure
-      // it is never used non-trivially.
+      // value of N with the corresponding value of M.  So do that now.
       assert(N->getNumValues() == M->getNumValues() &&
              "Node morphing changed the number of results!");
       for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
-        // Replacing the value takes care of remapping the new value.  Do the
-        // replacement without recording it in ReplacedValues.  This does not
-        // expunge From but that is fine - it is not really a new node.
-        ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i));
+        // Replacing the value takes care of remapping the new value.
+        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
       assert(N->getNodeId() == NewNode && "Unexpected node state!");
       // The node continues to live on as part of the NewNode fungus that
       // grows on top of the useful nodes.  Nothing more needs to be done
@@ -666,14 +670,14 @@ namespace {
 }
 
 
-/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith.  Updates the
-/// DAG causing any uses of From to use To instead, but without expunging From
-/// or recording the replacement in ReplacedValues.  Do not call directly unless
-/// you really know what you are doing!
-void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value.  Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
   assert(From.getNode() != To.getNode() && "Potential legalization loop!");
 
   // If expansion produced new nodes, make sure they are properly marked.
+  ExpungeNode(From.getNode());
   AnalyzeNewValue(To); // Expunges To.
 
   // Anything that used the old node should now use the new one.  Note that this
@@ -682,6 +686,10 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
   NodeUpdateListener NUL(*this, NodesToAnalyze);
   DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
 
+  // The old node may still be present in a map like ExpandedIntegers or
+  // PromotedIntegers.  Inform maps about the replacement.
+  ReplacedValues[From] = To;
+
   // Process the list of nodes that need to be reanalyzed.
   while (!NodesToAnalyze.empty()) {
     SDNode *N = NodesToAnalyze.back();
@@ -712,25 +720,6 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) {
   }
 }
 
-/// ReplaceValueWith - The specified value was legalized to the specified other
-/// value.  Update the DAG and NodeIds replacing any uses of From to use To
-/// instead.
-void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
-  assert(From.getNode()->getNodeId() == ReadyToProcess &&
-         "Only the node being processed may be remapped!");
-
-  // If expansion produced new nodes, make sure they are properly marked.
-  ExpungeNode(From.getNode());
-  AnalyzeNewValue(To); // Expunges To.
-
-  // The old node may still be present in a map like ExpandedIntegers or
-  // PromotedIntegers.  Inform maps about the replacement.
-  ReplacedValues[From] = To;
-
-  // Do the replacement.
-  ReplaceValueWithHelper(From, To);
-}
-
 void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
          "Invalid type for promoted integer");
@@ -918,6 +907,29 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
   return true;
 }
 
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  TLI.ReplaceNodeResults(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom widen lower its result  after all.
+    return false;
+
+  // Update the widening map.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    SetWidenedVector(SDValue(N, i), Results[i]);
+  return true;
+}
+
 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
 /// which is split into two not necessarily identical pieces.
 void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e1b7022dda231..2ee9f8a42ed37 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -188,6 +188,7 @@ private:
   SDValue BitConvertVectorToIntegerVector(SDValue Op);
   SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
   bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  bool CustomWidenLowerNode(SDNode *N, EVT VT);
   SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
@@ -196,7 +197,6 @@ private:
                       DebugLoc dl);
   SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
   void ReplaceValueWith(SDValue From, SDValue To);
-  void ReplaceValueWithHelper(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
                     SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ca194305d9898..785c2adb39431 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -54,9 +54,6 @@ class VectorLegalizer {
   SDValue LegalizeOp(SDValue Op);
   // Assuming the node is legal, "legalize" the results
   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
-  // Implements unrolling a generic vector operation, i.e. turning it into
-  // scalar operations.
-  SDValue UnrollVectorOp(SDValue Op);
   // Implements unrolling a VSETCC.
   SDValue UnrollVSETCC(SDValue Op);
   // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
@@ -211,7 +208,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     else if (Node->getOpcode() == ISD::VSETCC)
       Result = UnrollVSETCC(Op);
     else
-      Result = UnrollVectorOp(Op);
+      Result = DAG.UnrollVectorOp(Op.getNode());
     break;
   }
 
@@ -256,7 +253,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
     return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                        Zero, Op.getOperand(0));
   }
-  return UnrollVectorOp(Op);
+  return DAG.UnrollVectorOp(Op.getNode());
 }
 
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
@@ -282,56 +279,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
 }
 
-/// UnrollVectorOp - We know that the given vector has a legal type, however
-/// the operation it performs is not legal, and the target has requested that
-/// the operation be expanded.  "Unroll" the vector, splitting out the scalars
-/// and operating on each element individually.
-SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) {
-  EVT VT = Op.getValueType();
-  assert(Op.getNode()->getNumValues() == 1 &&
-         "Can't unroll a vector with multiple results!");
-  unsigned NE = VT.getVectorNumElements();
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc dl = Op.getDebugLoc();
-
-  SmallVector<SDValue, 8> Scalars;
-  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
-  for (unsigned i = 0; i != NE; ++i) {
-    for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
-      SDValue Operand = Op.getOperand(j);
-      EVT OperandVT = Operand.getValueType();
-      if (OperandVT.isVector()) {
-        // A vector operand; extract a single element.
-        EVT OperandEltVT = OperandVT.getVectorElementType();
-        Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                  OperandEltVT,
-                                  Operand,
-                                  DAG.getConstant(i, MVT::i32));
-      } else {
-        // A scalar operand; just use it as is.
-        Operands[j] = Operand;
-      }
-    }
-
-    switch (Op.getOpcode()) {
-    default:
-      Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT,
-                                    &Operands[0], Operands.size()));
-      break;
-    case ISD::SHL:
-    case ISD::SRA:
-    case ISD::SRL:
-    case ISD::ROTL:
-    case ISD::ROTR:
-      Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0],
-                                    DAG.getShiftAmountOperand(Operands[1])));
-      break;
-    }
-  }
-
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size());
-}
-
 }
 
 bool SelectionDAG::LegalizeVectors() {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 75e12395d8bd0..023324b82af36 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1118,8 +1118,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   DEBUG(errs() << "Widen node result " << ResNo << ": ";
         N->dump(&DAG);
         errs() << "\n");
-  SDValue Res = SDValue();
 
+  // See if the target wants to custom widen this node.
+  if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+    return;
+
+  SDValue Res = SDValue();
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4530ffc4a2d0d..c38c79b14597a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -200,19 +200,6 @@ bool ISD::isScalarToVector(const SDNode *N) {
   return true;
 }
 
-
-/// isDebugLabel - Return true if the specified node represents a debug
-/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node).
-bool ISD::isDebugLabel(const SDNode *N) {
-  SDValue Zero;
-  if (N->getOpcode() == ISD::DBG_LABEL)
-    return true;
-  if (N->isMachineOpcode() &&
-      N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL)
-    return true;
-  return false;
-}
-
 /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
 /// when given the operation for (X op Y).
 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -393,13 +380,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::Register:
     ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
     break;
-  case ISD::DBG_STOPPOINT: {
-    const DbgStopPointSDNode *DSP = cast<DbgStopPointSDNode>(N);
-    ID.AddInteger(DSP->getLine());
-    ID.AddInteger(DSP->getColumn());
-    ID.AddPointer(DSP->getCompileUnit());
-    break;
-  }
+
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
@@ -462,7 +443,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   }
   case ISD::TargetBlockAddress:
   case ISD::BlockAddress: {
-    ID.AddPointer(cast<BlockAddressSDNode>(N));
+    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
     break;
   }
   } // end switch (N->getOpcode())
@@ -508,8 +490,6 @@ static bool doNotCSE(SDNode *N) {
   switch (N->getOpcode()) {
   default: break;
   case ISD::HANDLENODE:
-  case ISD::DBG_LABEL:
-  case ISD::DBG_STOPPOINT:
   case ISD::EH_LABEL:
     return true;   // Never CSE these nodes.
   }
@@ -1296,16 +1276,6 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root,
-                                      unsigned Line, unsigned Col,
-                                      MDNode *CU) {
-  SDNode *N = NodeAllocator.Allocate<DbgStopPointSDNode>();
-  new (N) DbgStopPointSDNode(Root, Line, Col, CU);
-  N->setDebugLoc(DL);
-  AllNodes.push_back(N);
-  return SDValue(N, 0);
-}
-
 SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
                                SDValue Root,
                                unsigned LabelID) {
@@ -1323,18 +1293,20 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, DebugLoc DL,
-                                      bool isTarget) {
+SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
   unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
 
   FoldingSetNodeID ID;
-  AddNodeIDNode(ID, Opc, getVTList(TLI.getPointerTy()), 0, 0);
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(BA);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
-  new (N) BlockAddressSDNode(Opc, DL, TLI.getPointerTy(), BA);
+  new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -5452,7 +5424,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UNDEF:         return "undef";
   case ISD::MERGE_VALUES:  return "merge_values";
   case ISD::INLINEASM:     return "inlineasm";
-  case ISD::DBG_LABEL:     return "dbg_label";
   case ISD::EH_LABEL:      return "eh_label";
   case ISD::HANDLENODE:    return "handlenode";
 
@@ -5586,10 +5557,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CTTZ:    return "cttz";
   case ISD::CTLZ:    return "ctlz";
 
-  // Debug info
-  case ISD::DBG_STOPPOINT: return "dbg_stoppoint";
-  case ISD::DEBUG_LOC: return "debug_loc";
-
   // Trampolines
   case ISD::TRAMPOLINE: return "trampoline";
 
@@ -5810,6 +5777,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     OS << ", ";
     WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
     OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   }
 }
 
@@ -5838,6 +5807,66 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
   N->dump(G);
 }
 
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+  assert(N->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  unsigned i;
+  for (i= 0; i != NE; ++i) {
+    for (unsigned j = 0; j != N->getNumOperands(); ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        // A vector operand; extract a single element.
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                              OperandEltVT,
+                              Operand,
+                              getConstant(i, MVT::i32));
+      } else {
+        // A scalar operand; just use it as is.
+        Operands[j] = Operand;
+      }
+    }
+
+    switch (N->getOpcode()) {
+    default:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                &Operands[0], Operands.size()));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+                                getShiftAmountOperand(Operands[1])));
+      break;
+    }
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(getUNDEF(EltVT));
+
+  return getNode(ISD::BUILD_VECTOR, dl,
+                 EVT::getVectorVT(*getContext(), EltVT, ResNE),
+                 &Scalars[0], Scalars.size());
+}
+
 void SelectionDAG::dump() const {
   errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
@@ -5993,3 +6022,4 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
       return false;
   return true;
 }
+
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
deleted file mode 100644
index 90fd95eb6352e..0000000000000
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ /dev/null
@@ -1,6110 +0,0 @@
-//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements routines for translating from LLVM IR into SelectionDAG IR.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "isel"
-#include "SelectionDAGBuild.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Constants.h"
-#include "llvm/Constants.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/FastISel.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/DwarfWriter.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-using namespace llvm;
-
-/// LimitFloatPrecision - Generate low-precision inline sequences for
-/// some float libcalls (6, 8 or 12 bits).
-static unsigned LimitFloatPrecision;
-
-static cl::opt<unsigned, true>
-LimitFPPrecision("limit-float-precision",
-                 cl::desc("Generate low-precision inline sequences "
-                          "for some float libcalls"),
-                 cl::location(LimitFloatPrecision),
-                 cl::init(0));
-
-/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
-/// of insertvalue or extractvalue indices that identify a member, return
-/// the linearized index of the start of the member.
-///
-static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
-                                   const unsigned *Indices,
-                                   const unsigned *IndicesEnd,
-                                   unsigned CurIndex = 0) {
-  // Base case: We're done.
-  if (Indices && Indices == IndicesEnd)
-    return CurIndex;
-
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-        EI != EE; ++EI) {
-      if (Indices && *Indices == unsigned(EI - EB))
-        return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // Given an array type, recursively traverse the elements.
-  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
-      if (Indices && *Indices == i)
-        return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
-      CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
-    }
-    return CurIndex;
-  }
-  // We haven't found the type we're looking for, so keep searching.
-  return CurIndex + 1;
-}
-
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
-                            SmallVectorImpl<EVT> &ValueVTs,
-                            SmallVectorImpl<uint64_t> *Offsets = 0,
-                            uint64_t StartingOffset = 0) {
-  // Given a struct type, recursively traverse the elements.
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
-    for (StructType::element_iterator EB = STy->element_begin(),
-                                      EI = EB,
-                                      EE = STy->element_end();
-         EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
-                      StartingOffset + SL->getElementOffset(EI - EB));
-    return;
-  }
-  // Given an array type, recursively traverse the elements.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
-    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
-                      StartingOffset + i * EltSize);
-    return;
-  }
-  // Interpret void as zero return values.
-  if (Ty == Type::getVoidTy(Ty->getContext()))
-    return;
-  // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
-  if (Offsets)
-    Offsets->push_back(StartingOffset);
-}
-
-namespace llvm {
-  /// RegsForValue - This struct represents the registers (physical or virtual)
-  /// that a particular set of values is assigned, and the type information about
-  /// the value. The most common situation is to represent one value at a time,
-  /// but struct or array values are handled element-wise as multiple values.
-  /// The splitting of aggregates is performed recursively, so that we never
-  /// have aggregate-typed registers. The values at this point do not necessarily
-  /// have legal types, so each value may require one or more registers of some
-  /// legal type.
-  ///
-  struct VISIBILITY_HIDDEN RegsForValue {
-    /// TLI - The TargetLowering object.
-    ///
-    const TargetLowering *TLI;
-
-    /// ValueVTs - The value types of the values, which may not be legal, and
-    /// may need be promoted or synthesized from one or more registers.
-    ///
-    SmallVector<EVT, 4> ValueVTs;
-
-    /// RegVTs - The value types of the registers. This is the same size as
-    /// ValueVTs and it records, for each value, what the type of the assigned
-    /// register or registers are. (Individual values are never synthesized
-    /// from more than one type of register.)
-    ///
-    /// With virtual registers, the contents of RegVTs is redundant with TLI's
-    /// getRegisterType member function, however when with physical registers
-    /// it is necessary to have a separate record of the types.
-    ///
-    SmallVector<EVT, 4> RegVTs;
-
-    /// Regs - This list holds the registers assigned to the values.
-    /// Each legal or promoted value requires one register, and each
-    /// expanded value requires multiple registers.
-    ///
-    SmallVector<unsigned, 4> Regs;
-
-    RegsForValue() : TLI(0) {}
-
-    RegsForValue(const TargetLowering &tli,
-                 const SmallVector<unsigned, 4> &regs,
-                 EVT regvt, EVT valuevt)
-      : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
-    RegsForValue(const TargetLowering &tli,
-                 const SmallVector<unsigned, 4> &regs,
-                 const SmallVector<EVT, 4> &regvts,
-                 const SmallVector<EVT, 4> &valuevts)
-      : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, const Type *Ty) : TLI(&tli) {
-      ComputeValueVTs(tli, Ty, ValueVTs);
-
-      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        EVT ValueVT = ValueVTs[Value];
-        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
-        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
-        for (unsigned i = 0; i != NumRegs; ++i)
-          Regs.push_back(Reg + i);
-        RegVTs.push_back(RegisterVT);
-        Reg += NumRegs;
-      }
-    }
-
-    /// append - Add the specified values to this one.
-    void append(const RegsForValue &RHS) {
-      TLI = RHS.TLI;
-      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
-      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
-      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
-    }
-
-
-    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-    /// this value and returns the result as a ValueVTs value.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
-                              SDValue &Chain, SDValue *Flag) const;
-
-    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-    /// specified value into the registers specified by this object.  This uses
-    /// Chain/Flag as the input and updates them for the output Chain/Flag.
-    /// If the Flag pointer is NULL, no flag is used.
-    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       SDValue &Chain, SDValue *Flag) const;
-
-    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
-    /// operand list.  This adds the code marker, matching input operand index
-    /// (if applicable), and includes the number of values added into it.
-    void AddInlineAsmOperands(unsigned Code,
-                              bool HasMatching, unsigned MatchingIdx,
-                              SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
-  };
-}
-
-/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
-/// PHI nodes or outside of the basic block that defines it, or used by a
-/// switch or atomic instruction, which may expand to multiple basic blocks.
-static bool isUsedOutsideOfDefiningBlock(Instruction *I) {
-  if (isa<PHINode>(I)) return true;
-  BasicBlock *BB = I->getParent();
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI)
-    if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
-      return true;
-  return false;
-}
-
-/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
-/// entry block, return true.  This includes arguments used by switches, since
-/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) {
-  // With FastISel active, we may be splitting blocks, so force creation
-  // of virtual registers for all non-dead arguments.
-  // Don't force virtual registers for byval arguments though, because
-  // fast-isel can't handle those in all cases.
-  if (EnableFastISel && !A->hasByValAttr())
-    return A->use_empty();
-
-  BasicBlock *Entry = A->getParent()->begin();
-  for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI)
-    if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
-      return false;  // Use not in entry block.
-  return true;
-}
-
-FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli)
-  : TLI(tli) {
-}
-
-void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
-                               SelectionDAG &DAG,
-                               bool EnableFastISel) {
-  Fn = &fn;
-  MF = &mf;
-  RegInfo = &MF->getRegInfo();
-
-  // Create a vreg for each argument register that is not dead and is used
-  // outside of the entry block for the function.
-  for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
-       AI != E; ++AI)
-    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
-      InitializeRegForValue(AI);
-
-  // Initialize the mapping of values to registers.  This is only set up for
-  // instruction values that are used outside of the block that defines
-  // them.
-  Function::iterator BB = Fn->begin(), EB = Fn->end();
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
-        const Type *Ty = AI->getAllocatedType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align =
-          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
-                   AI->getAlignment());
-
-        TySize *= CUI->getZExtValue();   // Get total allocated size.
-        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
-        StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
-      }
-
-  for (; BB != EB; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I))
-        if (!isa<AllocaInst>(I) ||
-            !StaticAllocaMap.count(cast<AllocaInst>(I)))
-          InitializeRegForValue(I);
-
-  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
-  // also creates the initial PHI MachineInstrs, though none of the input
-  // operands are populated.
-  for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) {
-    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
-    MBBMap[BB] = MBB;
-    MF->push_back(MBB);
-
-    // Transfer the address-taken flag. This is necessary because there could
-    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
-    // the first one should be marked.
-    if (BB->hasAddressTaken())
-      MBB->setHasAddressTaken();
-
-    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
-    // appropriate.
-    PHINode *PN;
-    DebugLoc DL;
-    for (BasicBlock::iterator
-           I = BB->begin(), E = BB->end(); I != E; ++I) {
-
-      PN = dyn_cast<PHINode>(I);
-      if (!PN || PN->use_empty()) continue;
-
-      unsigned PHIReg = ValueMap[PN];
-      assert(PHIReg && "PHI node does not have an assigned virtual register!");
-
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
-      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
-        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-        for (unsigned i = 0; i != NumRegisters; ++i)
-          BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i);
-        PHIReg += NumRegisters;
-      }
-    }
-  }
-}
-
-unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
-  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
-}
-
-/// CreateRegForValue - Allocate the appropriate number of virtual registers of
-/// the correctly promoted or expanded types.  Assign these registers
-/// consecutive vreg numbers and return the first assigned number.
-///
-/// In the case that the given value has struct or array type, this function
-/// will assign registers for each member or element.
-///
-unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, V->getType(), ValueVTs);
-
-  unsigned FirstReg = 0;
-  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
-
-    unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = MakeReg(RegisterVT);
-      if (!FirstReg) FirstReg = R;
-    }
-  }
-  return FirstReg;
-}
-
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent.  If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
-                                const SDValue *Parts,
-                                unsigned NumParts, EVT PartVT, EVT ValueVT,
-                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
-  assert(NumParts > 0 && "No parts to assemble!");
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue Val = Parts[0];
-
-  if (NumParts > 1) {
-    // Assemble the value from multiple parts.
-    if (!ValueVT.isVector() && ValueVT.isInteger()) {
-      unsigned PartBits = PartVT.getSizeInBits();
-      unsigned ValueBits = ValueVT.getSizeInBits();
-
-      // Assemble the power of 2 part.
-      unsigned RoundParts = NumParts & (NumParts - 1) ?
-        1 << Log2_32(NumParts) : NumParts;
-      unsigned RoundBits = PartBits * RoundParts;
-      EVT RoundVT = RoundBits == ValueBits ?
-        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
-      SDValue Lo, Hi;
-
-      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
-
-      if (RoundParts > 2) {
-        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
-        Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
-                              PartVT, HalfVT);
-      } else {
-        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
-        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
-      }
-      if (TLI.isBigEndian())
-        std::swap(Lo, Hi);
-      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
-
-      if (RoundParts < NumParts) {
-        // Assemble the trailing non-power-of-2 part.
-        unsigned OddParts = NumParts - RoundParts;
-        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
-        Hi = getCopyFromParts(DAG, dl,
-                              Parts+RoundParts, OddParts, PartVT, OddVT);
-
-        // Combine the round and odd parts.
-        Lo = Val;
-        if (TLI.isBigEndian())
-          std::swap(Lo, Hi);
-        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
-        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
-                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
-                                         TLI.getPointerTy()));
-        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
-        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
-      }
-    } else if (ValueVT.isVector()) {
-      // Handle a multi-element vector.
-      EVT IntermediateVT, RegisterVT;
-      unsigned NumIntermediates;
-      unsigned NumRegs =
-        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 
-                                   NumIntermediates, RegisterVT);
-      assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
-      NumParts = NumRegs; // Silence a compiler warning.
-      assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-      assert(RegisterVT == Parts[0].getValueType() &&
-             "Part type doesn't match part!");
-
-      // Assemble the parts into intermediate operands.
-      SmallVector<SDValue, 8> Ops(NumIntermediates);
-      if (NumIntermediates == NumParts) {
-        // If the register was not expanded, truncate or copy the value,
-        // as appropriate.
-        for (unsigned i = 0; i != NumParts; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
-                                    PartVT, IntermediateVT);
-      } else if (NumParts > 0) {
-        // If the intermediate type was expanded, build the intermediate operands
-        // from the parts.
-        assert(NumParts % NumIntermediates == 0 &&
-               "Must expand into a divisible number of parts!");
-        unsigned Factor = NumParts / NumIntermediates;
-        for (unsigned i = 0; i != NumIntermediates; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
-                                    PartVT, IntermediateVT);
-      }
-
-      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
-      // operands.
-      Val = DAG.getNode(IntermediateVT.isVector() ?
-                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
-                        ValueVT, &Ops[0], NumIntermediates);
-    } else if (PartVT.isFloatingPoint()) {
-      // FP split into multiple FP parts (for ppcf128)
-      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
-             "Unexpected split");
-      SDValue Lo, Hi;
-      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
-      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
-      if (TLI.isBigEndian())
-        std::swap(Lo, Hi);
-      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
-    } else {
-      // FP split into integer parts (soft fp)
-      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
-             !PartVT.isVector() && "Unexpected split");
-      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
-    }
-  }
-
-  // There is now one part, held in Val.  Correct it to match ValueVT.
-  PartVT = Val.getValueType();
-
-  if (PartVT == ValueVT)
-    return Val;
-
-  if (PartVT.isVector()) {
-    assert(ValueVT.isVector() && "Unknown vector conversion!");
-    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
-  }
-
-  if (ValueVT.isVector()) {
-    assert(ValueVT.getVectorElementType() == PartVT &&
-           ValueVT.getVectorNumElements() == 1 &&
-           "Only trivial scalar-to-vector conversions should get here!");
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
-  }
-
-  if (PartVT.isInteger() &&
-      ValueVT.isInteger()) {
-    if (ValueVT.bitsLT(PartVT)) {
-      // For a truncate, see if we have any information to
-      // indicate whether the truncated bits will always be
-      // zero or sign-extension.
-      if (AssertOp != ISD::DELETED_NODE)
-        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
-                          DAG.getValueType(ValueVT));
-      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-    } else {
-      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
-    }
-  }
-
-  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
-    if (ValueVT.bitsLT(Val.getValueType()))
-      // FP_ROUND's are always exact here.
-      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
-                         DAG.getIntPtrConstant(1));
-    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
-  }
-
-  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
-    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
-
-  llvm_unreachable("Unknown mismatch!");
-  return SDValue();
-}
-
-/// getCopyToParts - Create a series of nodes that contain the specified value
-/// split into legal parts.  If the parts contain more bits than Val, then, for
-/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
-                           SDValue *Parts, unsigned NumParts, EVT PartVT,
-                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PtrVT = TLI.getPointerTy();
-  EVT ValueVT = Val.getValueType();
-  unsigned PartBits = PartVT.getSizeInBits();
-  unsigned OrigNumParts = NumParts;
-  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
-
-  if (!NumParts)
-    return;
-
-  if (!ValueVT.isVector()) {
-    if (PartVT == ValueVT) {
-      assert(NumParts == 1 && "No-op copy with multiple parts!");
-      Parts[0] = Val;
-      return;
-    }
-
-    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
-      // If the parts cover more bits than the value has, promote the value.
-      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
-        assert(NumParts == 1 && "Do not know what to promote to!");
-        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
-      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
-      } else {
-        llvm_unreachable("Unknown mismatch!");
-      }
-    } else if (PartBits == ValueVT.getSizeInBits()) {
-      // Different types of the same size.
-      assert(NumParts == 1 && PartVT != ValueVT);
-      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
-    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
-      // If the parts cover less bits than value has, truncate the value.
-      if (PartVT.isInteger() && ValueVT.isInteger()) {
-        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-      } else {
-        llvm_unreachable("Unknown mismatch!");
-      }
-    }
-
-    // The value may have changed - recompute ValueVT.
-    ValueVT = Val.getValueType();
-    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
-           "Failed to tile the value with PartVT!");
-
-    if (NumParts == 1) {
-      assert(PartVT == ValueVT && "Type conversion failed!");
-      Parts[0] = Val;
-      return;
-    }
-
-    // Expand the value into multiple parts.
-    if (NumParts & (NumParts - 1)) {
-      // The number of parts is not a power of 2.  Split off and copy the tail.
-      assert(PartVT.isInteger() && ValueVT.isInteger() &&
-             "Do not know what to expand to!");
-      unsigned RoundParts = 1 << Log2_32(NumParts);
-      unsigned RoundBits = RoundParts * PartBits;
-      unsigned OddParts = NumParts - RoundParts;
-      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
-                                   DAG.getConstant(RoundBits,
-                                                   TLI.getPointerTy()));
-      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
-      if (TLI.isBigEndian())
-        // The odd parts were reversed by getCopyToParts - unreverse them.
-        std::reverse(Parts + RoundParts, Parts + NumParts);
-      NumParts = RoundParts;
-      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
-      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
-    }
-
-    // The number of parts is a power of 2.  Repeatedly bisect the value using
-    // EXTRACT_ELEMENT.
-    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
-                           Val);
-    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
-      for (unsigned i = 0; i < NumParts; i += StepSize) {
-        unsigned ThisBits = StepSize * PartBits / 2;
-        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
-        SDValue &Part0 = Parts[i];
-        SDValue &Part1 = Parts[i+StepSize/2];
-
-        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
-                            ThisVT, Part0,
-                            DAG.getConstant(1, PtrVT));
-        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
-                            ThisVT, Part0,
-                            DAG.getConstant(0, PtrVT));
-
-        if (ThisBits == PartBits && ThisVT != PartVT) {
-          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                                PartVT, Part0);
-          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                                PartVT, Part1);
-        }
-      }
-    }
-
-    if (TLI.isBigEndian())
-      std::reverse(Parts, Parts + OrigNumParts);
-
-    return;
-  }
-
-  // Vector ValueVT.
-  if (NumParts == 1) {
-    if (PartVT != ValueVT) {
-      if (PartVT.isVector()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
-      } else {
-        assert(ValueVT.getVectorElementType() == PartVT &&
-               ValueVT.getVectorNumElements() == 1 &&
-               "Only trivial vector-to-scalar conversions should get here!");
-        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                          PartVT, Val,
-                          DAG.getConstant(0, PtrVT));
-      }
-    }
-
-    Parts[0] = Val;
-    return;
-  }
-
-  // Handle a multi-element vector.
-  EVT IntermediateVT, RegisterVT;
-  unsigned NumIntermediates;
-  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                              IntermediateVT, NumIntermediates, RegisterVT);
-  unsigned NumElements = ValueVT.getVectorNumElements();
-
-  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
-  NumParts = NumRegs; // Silence a compiler warning.
-  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
-  // Split the vector into intermediate operands.
-  SmallVector<SDValue, 8> Ops(NumIntermediates);
-  for (unsigned i = 0; i != NumIntermediates; ++i)
-    if (IntermediateVT.isVector())
-      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                           IntermediateVT, Val,
-                           DAG.getConstant(i * (NumElements / NumIntermediates),
-                                           PtrVT));
-    else
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                           IntermediateVT, Val,
-                           DAG.getConstant(i, PtrVT));
-
-  // Split the intermediate operands into legal parts.
-  if (NumParts == NumIntermediates) {
-    // If the register was not expanded, promote or copy the value,
-    // as appropriate.
-    for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
-  } else if (NumParts > 0) {
-    // If the intermediate type was expanded, split each the value into
-    // legal parts.
-    assert(NumParts % NumIntermediates == 0 &&
-           "Must expand into a divisible number of parts!");
-    unsigned Factor = NumParts / NumIntermediates;
-    for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
-  }
-}
-
-
-void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
-  AA = &aa;
-  GFI = gfi;
-  TD = DAG.getTarget().getTargetData();
-}
-
-/// clear - Clear out the curret SelectionDAG and the associated
-/// state and prepare this SelectionDAGLowering object to be used
-/// for a new block. This doesn't clear out information about
-/// additional blocks that are needed to complete switch lowering
-/// or PHI node updating; that information is cleared out as it is
-/// consumed.
-void SelectionDAGLowering::clear() {
-  NodeMap.clear();
-  PendingLoads.clear();
-  PendingExports.clear();
-  EdgeMapping.clear();
-  DAG.clear();
-  CurDebugLoc = DebugLoc::getUnknownLoc();
-  HasTailCall = false;
-}
-
-/// getRoot - Return the current virtual root of the Selection DAG,
-/// flushing any PendingLoad items. This must be done before emitting
-/// a store or any other node that may need to be ordered after any
-/// prior load instructions.
-///
-SDValue SelectionDAGLowering::getRoot() {
-  if (PendingLoads.empty())
-    return DAG.getRoot();
-
-  if (PendingLoads.size() == 1) {
-    SDValue Root = PendingLoads[0];
-    DAG.setRoot(Root);
-    PendingLoads.clear();
-    return Root;
-  }
-
-  // Otherwise, we have to make a token factor node.
-  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
-                               &PendingLoads[0], PendingLoads.size());
-  PendingLoads.clear();
-  DAG.setRoot(Root);
-  return Root;
-}
-
-/// getControlRoot - Similar to getRoot, but instead of flushing all the
-/// PendingLoad items, flush all the PendingExports items. It is necessary
-/// to do this before emitting a terminator instruction.
-///
-SDValue SelectionDAGLowering::getControlRoot() {
-  SDValue Root = DAG.getRoot();
-
-  if (PendingExports.empty())
-    return Root;
-
-  // Turn all of the CopyToReg chains into one factored node.
-  if (Root.getOpcode() != ISD::EntryToken) {
-    unsigned i = 0, e = PendingExports.size();
-    for (; i != e; ++i) {
-      assert(PendingExports[i].getNode()->getNumOperands() > 1);
-      if (PendingExports[i].getNode()->getOperand(0) == Root)
-        break;  // Don't add the root if we already indirectly depend on it.
-    }
-
-    if (i == e)
-      PendingExports.push_back(Root);
-  }
-
-  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
-                     &PendingExports[0],
-                     PendingExports.size());
-  PendingExports.clear();
-  DAG.setRoot(Root);
-  return Root;
-}
-
-void SelectionDAGLowering::visit(Instruction &I) {
-  visit(I.getOpcode(), I);
-}
-
-void SelectionDAGLowering::visit(unsigned Opcode, User &I) {
-  // Note: this doesn't use InstVisitor, because it has to work with
-  // ConstantExpr's in addition to instructions.
-  switch (Opcode) {
-  default: llvm_unreachable("Unknown instruction type encountered!");
-    // Build the switch statement using the Instruction.def file.
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
-  case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
-#include "llvm/Instruction.def"
-  }
-}
-
-SDValue SelectionDAGLowering::getValue(const Value *V) {
-  SDValue &N = NodeMap[V];
-  if (N.getNode()) return N;
-
-  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
-    EVT VT = TLI.getValueType(V->getType(), true);
-
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return N = DAG.getConstant(*CI, VT);
-
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
-      return N = DAG.getGlobalAddress(GV, VT);
-
-    if (isa<ConstantPointerNull>(C))
-      return N = DAG.getConstant(0, TLI.getPointerTy());
-
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
-      return N = DAG.getConstantFP(*CFP, VT);
-
-    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
-      return N = DAG.getUNDEF(VT);
-
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-      visit(CE->getOpcode(), *CE);
-      SDValue N1 = NodeMap[V];
-      assert(N1.getNode() && "visit didn't populate the ValueMap!");
-      return N1;
-    }
-
-    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
-      SmallVector<SDValue, 4> Constants;
-      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
-           OI != OE; ++OI) {
-        SDNode *Val = getValue(*OI).getNode();
-        // If the operand is an empty aggregate, there are no values.
-        if (!Val) continue;
-        // Add each leaf value from the operand to the Constants list
-        // to form a flattened list of all the values.
-        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
-          Constants.push_back(SDValue(Val, i));
-      }
-      return DAG.getMergeValues(&Constants[0], Constants.size(),
-                                getCurDebugLoc());
-    }
-
-    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
-      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
-             "Unknown struct or array constant!");
-
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, C->getType(), ValueVTs);
-      unsigned NumElts = ValueVTs.size();
-      if (NumElts == 0)
-        return SDValue(); // empty struct
-      SmallVector<SDValue, 4> Constants(NumElts);
-      for (unsigned i = 0; i != NumElts; ++i) {
-        EVT EltVT = ValueVTs[i];
-        if (isa<UndefValue>(C))
-          Constants[i] = DAG.getUNDEF(EltVT);
-        else if (EltVT.isFloatingPoint())
-          Constants[i] = DAG.getConstantFP(0, EltVT);
-        else
-          Constants[i] = DAG.getConstant(0, EltVT);
-      }
-      return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
-    }
-
-    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
-      return DAG.getBlockAddress(BA, getCurDebugLoc());
-
-    const VectorType *VecTy = cast<VectorType>(V->getType());
-    unsigned NumElements = VecTy->getNumElements();
-
-    // Now that we know the number and type of the elements, get that number of
-    // elements into the Ops array based on what kind of constant it is.
-    SmallVector<SDValue, 16> Ops;
-    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
-      for (unsigned i = 0; i != NumElements; ++i)
-        Ops.push_back(getValue(CP->getOperand(i)));
-    } else {
-      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
-      EVT EltVT = TLI.getValueType(VecTy->getElementType());
-
-      SDValue Op;
-      if (EltVT.isFloatingPoint())
-        Op = DAG.getConstantFP(0, EltVT);
-      else
-        Op = DAG.getConstant(0, EltVT);
-      Ops.assign(NumElements, Op);
-    }
-
-    // Create a BUILD_VECTOR node.
-    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                                    VT, &Ops[0], Ops.size());
-  }
-
-  // If this is a static alloca, generate it as the frameindex instead of
-  // computation.
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-    if (SI != FuncInfo.StaticAllocaMap.end())
-      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
-  }
-
-  unsigned InReg = FuncInfo.ValueMap[V];
-  assert(InReg && "Value not in map!");
-
-  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
-  SDValue Chain = DAG.getEntryNode();
-  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
-}
-
-/// Get the EVTs and ArgFlags collections that represent the return type
-/// of the given function.  This does not require a DAG or a return value, and
-/// is suitable for use before any DAGs for the function are constructed.
-static void getReturnInfo(const Type* ReturnType,
-                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
-                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
-                   TargetLowering &TLI,
-                   SmallVectorImpl<uint64_t> *Offsets = 0) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
-  unsigned NumValues = ValueVTs.size();
-  if ( NumValues == 0 ) return;
-
-  for (unsigned j = 0, f = NumValues; j != f; ++j) {
-    EVT VT = ValueVTs[j];
-    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-    if (attr & Attribute::SExt)
-      ExtendKind = ISD::SIGN_EXTEND;
-    else if (attr & Attribute::ZExt)
-      ExtendKind = ISD::ZERO_EXTEND;
-
-    // FIXME: C calling convention requires the return type to be promoted to
-    // at least 32-bit. But this is not necessary for non-C calling
-    // conventions. The frontend should mark functions whose return values
-    // require promoting with signext or zeroext attributes.
-    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
-      if (VT.bitsLT(MinVT))
-        VT = MinVT;
-    }
-
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
-    // 'inreg' on function refers to return value
-    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-    if (attr & Attribute::InReg)
-      Flags.setInReg();
-
-    // Propagate extension type if any
-    if (attr & Attribute::SExt)
-      Flags.setSExt();
-    else if (attr & Attribute::ZExt)
-      Flags.setZExt();
-
-    for (unsigned i = 0; i < NumParts; ++i) {
-      OutVTs.push_back(PartVT);
-      OutFlags.push_back(Flags);
-    }
-  }
-}
-
-void SelectionDAGLowering::visitRet(ReturnInst &I) {
-  SDValue Chain = getControlRoot();
-  SmallVector<ISD::OutputArg, 8> Outs;
-  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-  
-  if (!FLI.CanLowerReturn) {
-    unsigned DemoteReg = FLI.DemoteRegister;
-    const Function *F = I.getParent()->getParent();
-
-    // Emit a store of the return value through the virtual register.
-    // Leave Outs empty so that LowerReturn won't try to load return
-    // registers the usual way.
-    SmallVector<EVT, 1> PtrValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 
-                    PtrValueVTs);
-
-    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
-    SDValue RetOp = getValue(I.getOperand(0));
-  
-    SmallVector<EVT, 4> ValueVTs;
-    SmallVector<uint64_t, 4> Offsets;
-    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
-    unsigned NumValues = ValueVTs.size();
-
-    SmallVector<SDValue, 4> Chains(NumValues);
-    EVT PtrVT = PtrValueVTs[0];
-    for (unsigned i = 0; i != NumValues; ++i)
-      Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
-                  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                  DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
-                  DAG.getConstant(Offsets[i], PtrVT)),
-                  NULL, Offsets[i], false, 0);
-    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                        MVT::Other, &Chains[0], NumValues);
-  }
-  else {
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
-      unsigned NumValues = ValueVTs.size();
-      if (NumValues == 0) continue;
-  
-      SDValue RetOp = getValue(I.getOperand(i));
-      for (unsigned j = 0, f = NumValues; j != f; ++j) {
-        EVT VT = ValueVTs[j];
-
-        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-        const Function *F = I.getParent()->getParent();
-        if (F->paramHasAttr(0, Attribute::SExt))
-          ExtendKind = ISD::SIGN_EXTEND;
-        else if (F->paramHasAttr(0, Attribute::ZExt))
-          ExtendKind = ISD::ZERO_EXTEND;
-
-        // FIXME: C calling convention requires the return type to be promoted to
-        // at least 32-bit. But this is not necessary for non-C calling
-        // conventions. The frontend should mark functions whose return values
-        // require promoting with signext or zeroext attributes.
-        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
-          if (VT.bitsLT(MinVT))
-            VT = MinVT;
-        }
-
-        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
-        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
-        SmallVector<SDValue, 4> Parts(NumParts);
-        getCopyToParts(DAG, getCurDebugLoc(),
-                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, ExtendKind);
-
-        // 'inreg' on function refers to return value
-        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-        if (F->paramHasAttr(0, Attribute::InReg))
-          Flags.setInReg();
-
-        // Propagate extension type if any
-        if (F->paramHasAttr(0, Attribute::SExt))
-          Flags.setSExt();
-        else if (F->paramHasAttr(0, Attribute::ZExt))
-          Flags.setZExt();
-
-        for (unsigned i = 0; i < NumParts; ++i)
-          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
-      }
-    }
-  }
-
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CallingConv::ID CallConv =
-    DAG.getMachineFunction().getFunction()->getCallingConv();
-  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
-                          Outs, getCurDebugLoc(), DAG);
-
-  // Verify that the target's LowerReturn behaved as expected.
-  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
-         "LowerReturn didn't return a valid chain!");
-
-  // Update the DAG with the new chain value resulting from return lowering.
-  DAG.setRoot(Chain);
-}
-
-/// CopyToExportRegsIfNeeded - If the given value has virtual registers
-/// created for it, emit nodes to copy the value into the virtual
-/// registers.
-void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) {
-  if (!V->use_empty()) {
-    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
-    if (VMI != FuncInfo.ValueMap.end())
-      CopyValueToVirtualRegister(V, VMI->second);
-  }
-}
-
-/// ExportFromCurrentBlock - If this condition isn't known to be exported from
-/// the current basic block, add it to ValueMap now so that we'll get a
-/// CopyTo/FromReg.
-void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) {
-  // No need to export constants.
-  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
-
-  // Already exported?
-  if (FuncInfo.isExportedInst(V)) return;
-
-  unsigned Reg = FuncInfo.InitializeRegForValue(V);
-  CopyValueToVirtualRegister(V, Reg);
-}
-
-bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V,
-                                                    const BasicBlock *FromBB) {
-  // The operands of the setcc have to be in this block.  We don't know
-  // how to export them from some other block.
-  if (Instruction *VI = dyn_cast<Instruction>(V)) {
-    // Can export from current BB.
-    if (VI->getParent() == FromBB)
-      return true;
-
-    // Is already exported, noop.
-    return FuncInfo.isExportedInst(V);
-  }
-
-  // If this is an argument, we can export it if the BB is the entry block or
-  // if it is already exported.
-  if (isa<Argument>(V)) {
-    if (FromBB == &FromBB->getParent()->getEntryBlock())
-      return true;
-
-    // Otherwise, can only export this if it is already exported.
-    return FuncInfo.isExportedInst(V);
-  }
-
-  // Otherwise, constants can always be exported.
-  return true;
-}
-
-static bool InBlock(const Value *V, const BasicBlock *BB) {
-  if (const Instruction *I = dyn_cast<Instruction>(V))
-    return I->getParent() == BB;
-  return true;
-}
-
-/// getFCmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR floating-point condition code.  This includes
-/// consideration of global floating-point math flags.
-///
-static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
-  switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
-  }
-  if (FiniteOnlyFPMath())
-    return FOC;
-  else
-    return FPC;
-}
-
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
-static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
-  switch (Pred) {
-  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
-  case ICmpInst::ICMP_NE:  return ISD::SETNE;
-  case ICmpInst::ICMP_SLE: return ISD::SETLE;
-  case ICmpInst::ICMP_ULE: return ISD::SETULE;
-  case ICmpInst::ICMP_SGE: return ISD::SETGE;
-  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
-  case ICmpInst::ICMP_SLT: return ISD::SETLT;
-  case ICmpInst::ICMP_ULT: return ISD::SETULT;
-  case ICmpInst::ICMP_SGT: return ISD::SETGT;
-  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
-  default:
-    llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
-  }
-}
-
-/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
-/// This function emits a branch and is used at the leaves of an OR or an
-/// AND operator tree.
-///
-void
-SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond,
-                                                   MachineBasicBlock *TBB,
-                                                   MachineBasicBlock *FBB,
-                                                   MachineBasicBlock *CurBB) {
-  const BasicBlock *BB = CurBB->getBasicBlock();
-
-  // If the leaf of the tree is a comparison, merge the condition into
-  // the caseblock.
-  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
-    // The operands of the cmp have to be in this block.  We don't know
-    // how to export them from some other block.  If this is the first block
-    // of the sequence, no exporting is needed.
-    if (CurBB == CurMBB ||
-        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
-         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
-      ISD::CondCode Condition;
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
-        Condition = getICmpCondCode(IC->getPredicate());
-      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
-        Condition = getFCmpCondCode(FC->getPredicate());
-      } else {
-        Condition = ISD::SETEQ; // silence warning.
-        llvm_unreachable("Unknown compare instruction");
-      }
-
-      CaseBlock CB(Condition, BOp->getOperand(0),
-                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
-      SwitchCases.push_back(CB);
-      return;
-    }
-  }
-
-  // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, TBB, FBB, CurBB);
-  SwitchCases.push_back(CB);
-}
-
-/// FindMergedConditions - If Cond is an expression like
-void SelectionDAGLowering::FindMergedConditions(Value *Cond,
-                                                MachineBasicBlock *TBB,
-                                                MachineBasicBlock *FBB,
-                                                MachineBasicBlock *CurBB,
-                                                unsigned Opc) {
-  // If this node is not part of the or/and tree, emit it as a branch.
-  Instruction *BOp = dyn_cast<Instruction>(Cond);
-  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
-      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
-      BOp->getParent() != CurBB->getBasicBlock() ||
-      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
-      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
-    return;
-  }
-
-  //  Create TmpBB after CurBB.
-  MachineFunction::iterator BBI = CurBB;
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
-  CurBB->getParent()->insert(++BBI, TmpBB);
-
-  if (Opc == Instruction::Or) {
-    // Codegen X | Y as:
-    //   jmp_if_X TBB
-    //   jmp TmpBB
-    // TmpBB:
-    //   jmp_if_Y TBB
-    //   jmp FBB
-    //
-
-    // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
-
-    // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
-  } else {
-    assert(Opc == Instruction::And && "Unknown merge op!");
-    // Codegen X & Y as:
-    //   jmp_if_X TmpBB
-    //   jmp FBB
-    // TmpBB:
-    //   jmp_if_Y TBB
-    //   jmp FBB
-    //
-    //  This requires creation of TmpBB after CurBB.
-
-    // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
-
-    // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
-  }
-}
-
-/// If the set of cases should be emitted as a series of branches, return true.
-/// If we should emit this as a bunch of and/or'd together conditions, return
-/// false.
-bool
-SelectionDAGLowering::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
-  if (Cases.size() != 2) return true;
-
-  // If this is two comparisons of the same values or'd or and'd together, they
-  // will get folded into a single comparison, so don't emit two blocks.
-  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
-       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
-      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
-       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
-    return false;
-  }
-
-  return true;
-}
-
-void SelectionDAGLowering::visitBr(BranchInst &I) {
-  // Update machine-CFG edges.
-  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  if (I.isUnconditional()) {
-    // Update machine-CFG edges.
-    CurMBB->addSuccessor(Succ0MBB);
-
-    // If this is not a fall-through branch, emit the branch.
-    if (Succ0MBB != NextBlock)
-      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
-                              MVT::Other, getControlRoot(),
-                              DAG.getBasicBlock(Succ0MBB)));
-    return;
-  }
-
-  // If this condition is one of the special cases we handle, do special stuff
-  // now.
-  Value *CondVal = I.getCondition();
-  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
-
-  // If this is a series of conditions that are or'd or and'd together, emit
-  // this as a sequence of branches instead of setcc's with and/or operations.
-  // For example, instead of something like:
-  //     cmp A, B
-  //     C = seteq
-  //     cmp D, E
-  //     F = setle
-  //     or C, F
-  //     jnz foo
-  // Emit:
-  //     cmp A, B
-  //     je foo
-  //     cmp D, E
-  //     jle foo
-  //
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (BOp->hasOneUse() &&
-        (BOp->getOpcode() == Instruction::And ||
-         BOp->getOpcode() == Instruction::Or)) {
-      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
-      // If the compares in later blocks need to use values not currently
-      // exported from this block, export them now.  This block should always
-      // be the first entry.
-      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
-
-      // Allow some cases to be rejected.
-      if (ShouldEmitAsBranches(SwitchCases)) {
-        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
-          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
-          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
-        }
-
-        // Emit the branch for this block.
-        visitSwitchCase(SwitchCases[0]);
-        SwitchCases.erase(SwitchCases.begin());
-        return;
-      }
-
-      // Okay, we decided not to do this, remove any inserted MBB's and clear
-      // SwitchCases.
-      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
-        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
-
-      SwitchCases.clear();
-    }
-  }
-
-  // Create a CaseBlock record representing this branch.
-  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, Succ0MBB, Succ1MBB, CurMBB);
-  // Use visitSwitchCase to actually insert the fast branch sequence for this
-  // cond branch.
-  visitSwitchCase(CB);
-}
-
-/// visitSwitchCase - Emits the necessary code to represent a single node in
-/// the binary search tree resulting from lowering a switch instruction.
-void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) {
-  SDValue Cond;
-  SDValue CondLHS = getValue(CB.CmpLHS);
-  DebugLoc dl = getCurDebugLoc();
-
-  // Build the setcc now.
-  if (CB.CmpMHS == NULL) {
-    // Fold "(X == true)" to X and "(X == false)" to !X to
-    // handle common cases produced by branch lowering.
-    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
-        CB.CC == ISD::SETEQ)
-      Cond = CondLHS;
-    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
-             CB.CC == ISD::SETEQ) {
-      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
-      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
-    } else
-      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
-  } else {
-    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
-
-    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
-    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
-
-    SDValue CmpOp = getValue(CB.CmpMHS);
-    EVT VT = CmpOp.getValueType();
-
-    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
-      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
-                          ISD::SETLE);
-    } else {
-      SDValue SUB = DAG.getNode(ISD::SUB, dl,
-                                VT, CmpOp, DAG.getConstant(Low, VT));
-      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
-                          DAG.getConstant(High-Low, VT), ISD::SETULE);
-    }
-  }
-
-  // Update successor info
-  CurMBB->addSuccessor(CB.TrueBB);
-  CurMBB->addSuccessor(CB.FalseBB);
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  // If the lhs block is the next block, invert the condition so that we can
-  // fall through to the lhs instead of the rhs block.
-  if (CB.TrueBB == NextBlock) {
-    std::swap(CB.TrueBB, CB.FalseBB);
-    SDValue True = DAG.getConstant(1, Cond.getValueType());
-    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
-  }
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
-                               MVT::Other, getControlRoot(), Cond,
-                               DAG.getBasicBlock(CB.TrueBB));
-
-  // If the branch was constant folded, fix up the CFG.
-  if (BrCond.getOpcode() == ISD::BR) {
-    CurMBB->removeSuccessor(CB.FalseBB);
-    DAG.setRoot(BrCond);
-  } else {
-    // Otherwise, go ahead and insert the false branch.
-    if (BrCond == getControlRoot())
-      CurMBB->removeSuccessor(CB.TrueBB);
-
-    if (CB.FalseBB == NextBlock)
-      DAG.setRoot(BrCond);
-    else
-      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
-                              DAG.getBasicBlock(CB.FalseBB)));
-  }
-}
-
-/// visitJumpTable - Emit JumpTable node in the current MBB
-void SelectionDAGLowering::visitJumpTable(JumpTable &JT) {
-  // Emit the code for the jump table
-  assert(JT.Reg != -1U && "Should lower JT Header first!");
-  EVT PTy = TLI.getPointerTy();
-  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
-                                     JT.Reg, PTy);
-  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
-  DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
-                          MVT::Other, Index.getValue(1),
-                          Table, Index));
-}
-
-/// visitJumpTableHeader - This function emits necessary code to produce index
-/// in the JumpTable from switch case.
-void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT,
-                                                JumpTableHeader &JTH) {
-  // Subtract the lowest switch case value from the value being switched on and
-  // conditional branch to default mbb if the result is greater than the
-  // difference between smallest and largest cases.
-  SDValue SwitchOp = getValue(JTH.SValue);
-  EVT VT = SwitchOp.getValueType();
-  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
-                            DAG.getConstant(JTH.First, VT));
-
-  // The SDNode we just created, which holds the value being switched on minus
-  // the the smallest case value, needs to be copied to a virtual register so it
-  // can be used as an index into the jump table in a subsequent basic block.
-  // This value may be smaller or larger than the target's pointer type, and
-  // therefore require extension or truncating.
-  SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
-
-  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
-                                    JumpTableReg, SwitchOp);
-  JT.Reg = JumpTableReg;
-
-  // Emit the range check for the jump table, and branch to the default block
-  // for the switch statement if the value being switched on exceeds the largest
-  // case in the switch.
-  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
-                             TLI.getSetCCResultType(SUB.getValueType()), SUB,
-                             DAG.getConstant(JTH.Last-JTH.First,VT),
-                             ISD::SETUGT);
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
-                               MVT::Other, CopyTo, CMP,
-                               DAG.getBasicBlock(JT.Default));
-
-  if (JT.MBB == NextBlock)
-    DAG.setRoot(BrCond);
-  else
-    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
-                            DAG.getBasicBlock(JT.MBB)));
-}
-
-/// visitBitTestHeader - This function emits necessary code to produce value
-/// suitable for "bit tests"
-void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) {
-  // Subtract the minimum value
-  SDValue SwitchOp = getValue(B.SValue);
-  EVT VT = SwitchOp.getValueType();
-  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
-                            DAG.getConstant(B.First, VT));
-
-  // Check range
-  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
-                                  TLI.getSetCCResultType(SUB.getValueType()),
-                                  SUB, DAG.getConstant(B.Range, VT),
-                                  ISD::SETUGT);
-
-  SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
-
-  B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
-  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
-                                    B.Reg, ShiftOp);
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
-
-  CurMBB->addSuccessor(B.Default);
-  CurMBB->addSuccessor(MBB);
-
-  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
-                                MVT::Other, CopyTo, RangeCmp,
-                                DAG.getBasicBlock(B.Default));
-
-  if (MBB == NextBlock)
-    DAG.setRoot(BrRange);
-  else
-    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
-                            DAG.getBasicBlock(MBB)));
-}
-
-/// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB,
-                                            unsigned Reg,
-                                            BitTestCase &B) {
-  // Make desired shift
-  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
-                                       TLI.getPointerTy());
-  SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
-                                  TLI.getPointerTy(),
-                                  DAG.getConstant(1, TLI.getPointerTy()),
-                                  ShiftOp);
-
-  // Emit bit tests and jumps
-  SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
-                              TLI.getPointerTy(), SwitchVal,
-                              DAG.getConstant(B.Mask, TLI.getPointerTy()));
-  SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
-                                TLI.getSetCCResultType(AndOp.getValueType()),
-                                AndOp, DAG.getConstant(0, TLI.getPointerTy()),
-                                ISD::SETNE);
-
-  CurMBB->addSuccessor(B.TargetBB);
-  CurMBB->addSuccessor(NextMBB);
-
-  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
-                              MVT::Other, getControlRoot(),
-                              AndCmp, DAG.getBasicBlock(B.TargetBB));
-
-  // Set NextBlock to be the MBB immediately after the current one, if any.
-  // This is used to avoid emitting unnecessary branches to the next block.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CurMBB;
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  if (NextMBB == NextBlock)
-    DAG.setRoot(BrAnd);
-  else
-    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
-                            DAG.getBasicBlock(NextMBB)));
-}
-
-void SelectionDAGLowering::visitInvoke(InvokeInst &I) {
-  // Retrieve successors.
-  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
-  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
-
-  const Value *Callee(I.getCalledValue());
-  if (isa<InlineAsm>(Callee))
-    visitInlineAsm(&I);
-  else
-    LowerCallTo(&I, getValue(Callee), false, LandingPad);
-
-  // If the value of the invoke is used outside of its defining block, make it
-  // available as a virtual register.
-  CopyToExportRegsIfNeeded(&I);
-
-  // Update successor info
-  CurMBB->addSuccessor(Return);
-  CurMBB->addSuccessor(LandingPad);
-
-  // Drop into normal successor.
-  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
-                          MVT::Other, getControlRoot(),
-                          DAG.getBasicBlock(Return)));
-}
-
-void SelectionDAGLowering::visitUnwind(UnwindInst &I) {
-}
-
-/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
-/// small case ranges).
-bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR,
-                                                  CaseRecVector& WorkList,
-                                                  Value* SV,
-                                                  MachineBasicBlock* Default) {
-  Case& BackCase  = *(CR.Range.second-1);
-
-  // Size is the number of Cases represented by this range.
-  size_t Size = CR.Range.second - CR.Range.first;
-  if (Size > 3)
-    return false;
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
-  MachineFunction::iterator BBI = CR.CaseBB;
-
-  if (++BBI != FuncInfo.MF->end())
-    NextBlock = BBI;
-
-  // TODO: If any two of the cases has the same destination, and if one value
-  // is the same as the other, but has one bit unset that the other has set,
-  // use bit manipulation to do two compares at once.  For example:
-  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
-
-  // Rearrange the case blocks so that the last one falls through if possible.
-  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
-    // The last case block won't fall through into 'NextBlock' if we emit the
-    // branches in this order.  See if rearranging a case value would help.
-    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
-      if (I->BB == NextBlock) {
-        std::swap(*I, BackCase);
-        break;
-      }
-    }
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the Case's target mbb if the value being switched on SV is equal
-  // to C.
-  MachineBasicBlock *CurBlock = CR.CaseBB;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
-    MachineBasicBlock *FallThrough;
-    if (I != E-1) {
-      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
-      CurMF->insert(BBI, FallThrough);
-
-      // Put SV in a virtual register to make it available from the new blocks.
-      ExportFromCurrentBlock(SV);
-    } else {
-      // If the last case doesn't match, go to the default block.
-      FallThrough = Default;
-    }
-
-    Value *RHS, *LHS, *MHS;
-    ISD::CondCode CC;
-    if (I->High == I->Low) {
-      // This is just small small case range :) containing exactly 1 case
-      CC = ISD::SETEQ;
-      LHS = SV; RHS = I->High; MHS = NULL;
-    } else {
-      CC = ISD::SETLE;
-      LHS = I->Low; MHS = SV; RHS = I->High;
-    }
-    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
-
-    // If emitting the first comparison, just call visitSwitchCase to emit the
-    // code into the current block.  Otherwise, push the CaseBlock onto the
-    // vector to be later processed by SDISel, and insert the node's MBB
-    // before the next MBB.
-    if (CurBlock == CurMBB)
-      visitSwitchCase(CB);
-    else
-      SwitchCases.push_back(CB);
-
-    CurBlock = FallThrough;
-  }
-
-  return true;
-}
-
-static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
-          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
-           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
-}
-
-static APInt ComputeRange(const APInt &First, const APInt &Last) {
-  APInt LastExt(Last), FirstExt(First);
-  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
-  return (LastExt - FirstExt + 1ULL);
-}
-
-/// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
-                                              CaseRecVector& WorkList,
-                                              Value* SV,
-                                              MachineBasicBlock* Default) {
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
-    TSize += I->size();
-
-  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
-    return false;
-
-  APInt Range = ComputeRange(First, Last);
-  double Density = TSize.roundToDouble() / Range.roundToDouble();
-  if (Density < 0.4)
-    return false;
-
-  DEBUG(errs() << "Lowering jump table\n"
-               << "First entry: " << First << ". Last entry: " << Last << '\n'
-               << "Range: " << Range
-               << "Size: " << TSize << ". Density: " << Density << "\n\n");
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Create a new basic block to hold the code for loading the address
-  // of the jump table, and jumping to it.  Update successor information;
-  // we will either branch to the default case for the switch, or the jump
-  // table.
-  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-  CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
-
-  // Build a vector of destination BBs, corresponding to each target
-  // of the jump table. If the value of the jump table slot corresponds to
-  // a case statement, push the case's BB onto the vector, otherwise, push
-  // the default BB.
-  std::vector<MachineBasicBlock*> DestBBs;
-  APInt TEI = First;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
-    const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
-    const APInt& High = cast<ConstantInt>(I->High)->getValue();
-
-    if (Low.sle(TEI) && TEI.sle(High)) {
-      DestBBs.push_back(I->BB);
-      if (TEI==High)
-        ++I;
-    } else {
-      DestBBs.push_back(Default);
-    }
-  }
-
-  // Update successor info. Add one edge to each unique successor.
-  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
-  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
-         E = DestBBs.end(); I != E; ++I) {
-    if (!SuccsHandled[(*I)->getNumber()]) {
-      SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
-    }
-  }
-
-  // Create a jump table index for this jump table, or return an existing
-  // one.
-  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
-
-  // Set the jump table information so that we can codegen it as a second
-  // MachineBasicBlock
-  JumpTable JT(-1U, JTI, JumpTableBB, Default);
-  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
-  if (CR.CaseBB == CurMBB)
-    visitJumpTableHeader(JT, JTH);
-
-  JTCases.push_back(JumpTableBlock(JTH, JT));
-
-  return true;
-}
-
-/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
-/// 2 subtrees.
-bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
-                                                   CaseRecVector& WorkList,
-                                                   Value* SV,
-                                                   MachineBasicBlock* Default) {
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  // Size is the number of Cases represented by this range.
-  unsigned Size = CR.Range.second - CR.Range.first;
-
-  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
-  double FMetric = 0;
-  CaseItr Pivot = CR.Range.first + Size/2;
-
-  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
-  // (heuristically) allow us to emit JumpTable's later.
-  APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
-    TSize += I->size();
-
-  APInt LSize = FrontCase.size();
-  APInt RSize = TSize-LSize;
-  DEBUG(errs() << "Selecting best pivot: \n"
-               << "First: " << First << ", Last: " << Last <<'\n'
-               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
-  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
-       J!=E; ++I, ++J) {
-    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
-    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
-    APInt Range = ComputeRange(LEnd, RBegin);
-    assert((Range - 2ULL).isNonNegative() &&
-           "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() / 
-                           (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize.roundToDouble() /
-                           (Last - RBegin + 1ULL).roundToDouble();
-    double Metric = Range.logBase2()*(LDensity+RDensity);
-    // Should always split in some non-trivial place
-    DEBUG(errs() <<"=>Step\n"
-                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
-                 << "LDensity: " << LDensity
-                 << ", RDensity: " << RDensity << '\n'
-                 << "Metric: " << Metric << '\n');
-    if (FMetric < Metric) {
-      Pivot = J;
-      FMetric = Metric;
-      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
-    }
-
-    LSize += J->size();
-    RSize -= J->size();
-  }
-  if (areJTsAllowed(TLI)) {
-    // If our case is dense we *really* should handle it earlier!
-    assert((FMetric > 0) && "Should handle dense range earlier!");
-  } else {
-    Pivot = CR.Range.first + Size/2;
-  }
-
-  CaseRange LHSR(CR.Range.first, Pivot);
-  CaseRange RHSR(Pivot, CR.Range.second);
-  Constant *C = Pivot->Low;
-  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
-
-  // We know that we branch to the LHS if the Value being switched on is
-  // less than the Pivot value, C.  We use this to optimize our binary
-  // tree a bit, by recognizing that if SV is greater than or equal to the
-  // LHS's Case Value, and that Case Value is exactly one less than the
-  // Pivot's Value, then we can branch directly to the LHS's Target,
-  // rather than creating a leaf node for it.
-  if ((LHSR.second - LHSR.first) == 1 &&
-      LHSR.first->High == CR.GE &&
-      cast<ConstantInt>(C)->getValue() ==
-      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
-    TrueBB = LHSR.first->BB;
-  } else {
-    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, TrueBB);
-    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Similar to the optimization above, if the Value being switched on is
-  // known to be less than the Constant CR.LT, and the current Case Value
-  // is CR.LT - 1, then we can branch directly to the target block for
-  // the current Case Value, rather than emitting a RHS leaf node for it.
-  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
-      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
-      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
-    FalseBB = RHSR.first->BB;
-  } else {
-    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, FalseBB);
-    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  // Create a CaseBlock record representing a conditional branch to
-  // the LHS node if the value being switched on SV is less than C.
-  // Otherwise, branch to LHS.
-  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
-
-  if (CR.CaseBB == CurMBB)
-    visitSwitchCase(CB);
-  else
-    SwitchCases.push_back(CB);
-
-  return true;
-}
-
-/// handleBitTestsSwitchCase - if current case range has few destination and
-/// range span less, than machine word bitwidth, encode case range into series
-/// of masks and emit bit tests with these masks.
-bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR,
-                                                    CaseRecVector& WorkList,
-                                                    Value* SV,
-                                                    MachineBasicBlock* Default){
-  EVT PTy = TLI.getPointerTy();
-  unsigned IntPtrBits = PTy.getSizeInBits();
-
-  Case& FrontCase = *CR.Range.first;
-  Case& BackCase  = *(CR.Range.second-1);
-
-  // Get the MachineFunction which holds the current MBB.  This is used when
-  // inserting any additional MBBs necessary to represent the switch.
-  MachineFunction *CurMF = FuncInfo.MF;
-
-  // If target does not have legal shift left, do not emit bit tests at all.
-  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
-    return false;
-
-  size_t numCmps = 0;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I) {
-    // Single case counts one, case range - two.
-    numCmps += (I->Low == I->High ? 1 : 2);
-  }
-
-  // Count unique destinations
-  SmallSet<MachineBasicBlock*, 4> Dests;
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
-    Dests.insert(I->BB);
-    if (Dests.size() > 3)
-      // Don't bother the code below, if there are too much unique destinations
-      return false;
-  }
-  DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
-               << "Total number of comparisons: " << numCmps << '\n');
-
-  // Compute span of values.
-  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
-  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
-  APInt cmpRange = maxValue - minValue;
-
-  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
-               << "Low bound: " << minValue << '\n'
-               << "High bound: " << maxValue << '\n');
-
-  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
-      (!(Dests.size() == 1 && numCmps >= 3) &&
-       !(Dests.size() == 2 && numCmps >= 5) &&
-       !(Dests.size() >= 3 && numCmps >= 6)))
-    return false;
-
-  DEBUG(errs() << "Emitting bit tests\n");
-  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
-
-  // Optimize the case where all the case values fit in a
-  // word without having to subtract minValue. In this case,
-  // we can optimize away the subtraction.
-  if (minValue.isNonNegative() &&
-      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
-    cmpRange = maxValue;
-  } else {
-    lowBound = minValue;
-  }
-
-  CaseBitsVector CasesBits;
-  unsigned i, count = 0;
-
-  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
-    MachineBasicBlock* Dest = I->BB;
-    for (i = 0; i < count; ++i)
-      if (Dest == CasesBits[i].BB)
-        break;
-
-    if (i == count) {
-      assert((count < 3) && "Too much destinations to test!");
-      CasesBits.push_back(CaseBits(0, Dest, 0));
-      count++;
-    }
-
-    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
-    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
-
-    uint64_t lo = (lowValue - lowBound).getZExtValue();
-    uint64_t hi = (highValue - lowBound).getZExtValue();
-
-    for (uint64_t j = lo; j <= hi; j++) {
-      CasesBits[i].Mask |=  1ULL << j;
-      CasesBits[i].Bits++;
-    }
-
-  }
-  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
-
-  BitTestInfo BTC;
-
-  // Figure out which block is immediately after the current one.
-  MachineFunction::iterator BBI = CR.CaseBB;
-  ++BBI;
-
-  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
-
-  DEBUG(errs() << "Cases:\n");
-  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
-                 << ", Bits: " << CasesBits[i].Bits
-                 << ", BB: " << CasesBits[i].BB << '\n');
-
-    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
-    CurMF->insert(BBI, CaseBB);
-    BTC.push_back(BitTestCase(CasesBits[i].Mask,
-                              CaseBB,
-                              CasesBits[i].BB));
-
-    // Put SV in a virtual register to make it available from the new blocks.
-    ExportFromCurrentBlock(SV);
-  }
-
-  BitTestBlock BTB(lowBound, cmpRange, SV,
-                   -1U, (CR.CaseBB == CurMBB),
-                   CR.CaseBB, Default, BTC);
-
-  if (CR.CaseBB == CurMBB)
-    visitBitTestHeader(BTB);
-
-  BitTestCases.push_back(BTB);
-
-  return true;
-}
-
-
-/// Clusterify - Transform simple list of Cases into list of CaseRange's
-size_t SelectionDAGLowering::Clusterify(CaseVector& Cases,
-                                          const SwitchInst& SI) {
-  size_t numCmps = 0;
-
-  // Start with "simple" cases
-  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
-    Cases.push_back(Case(SI.getSuccessorValue(i),
-                         SI.getSuccessorValue(i),
-                         SMBB));
-  }
-  std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
-  // Merge case into clusters
-  if (Cases.size() >= 2)
-    // Must recompute end() each iteration because it may be
-    // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
-      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
-      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
-      MachineBasicBlock* nextBB = J->BB;
-      MachineBasicBlock* currentBB = I->BB;
-
-      // If the two neighboring cases go to the same destination, merge them
-      // into a single case.
-      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
-        I->High = J->High;
-        J = Cases.erase(J);
-      } else {
-        I = J++;
-      }
-    }
-
-  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
-    if (I->Low != I->High)
-      // A range counts double, since it requires two compares.
-      ++numCmps;
-  }
-
-  return numCmps;
-}
-
-void SelectionDAGLowering::visitSwitch(SwitchInst &SI) {
-  // Figure out which block is immediately after the current one.
-  MachineBasicBlock *NextBlock = 0;
-
-  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
-
-  // If there is only the default destination, branch to it if it is not the
-  // next basic block.  Otherwise, just fall through.
-  if (SI.getNumOperands() == 2) {
-    // Update machine-CFG edges.
-
-    // If this is not a fall-through branch, emit the branch.
-    CurMBB->addSuccessor(Default);
-    if (Default != NextBlock)
-      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
-                              MVT::Other, getControlRoot(),
-                              DAG.getBasicBlock(Default)));
-    return;
-  }
-
-  // If there are any non-default case statements, create a vector of Cases
-  // representing each one, and sort the vector so that we can efficiently
-  // create a binary search tree from them.
-  CaseVector Cases;
-  size_t numCmps = Clusterify(Cases, SI);
-  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
-               << ". Total compares: " << numCmps << '\n');
-  numCmps = 0;
-
-  // Get the Value to be switched on and default basic blocks, which will be
-  // inserted into CaseBlock records, representing basic blocks in the binary
-  // search tree.
-  Value *SV = SI.getOperand(0);
-
-  // Push the initial CaseRec onto the worklist
-  CaseRecVector WorkList;
-  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
-
-  while (!WorkList.empty()) {
-    // Grab a record representing a case range to process off the worklist
-    CaseRec CR = WorkList.back();
-    WorkList.pop_back();
-
-    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
-      continue;
-
-    // If the range has few cases (two or less) emit a series of specific
-    // tests.
-    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
-      continue;
-
-    // If the switch has more than 5 blocks, and at least 40% dense, and the
-    // target supports indirect branches, then emit a jump table rather than
-    // lowering the switch to a binary tree of conditional branches.
-    if (handleJTSwitchCase(CR, WorkList, SV, Default))
-      continue;
-
-    // Emit binary tree. We need to pick a pivot, and push left and right ranges
-    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
-    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
-  }
-}
-
-void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) {
-  // Update machine-CFG edges.
-  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
-
-  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
-                          MVT::Other, getControlRoot(),
-                          getValue(I.getAddress())));
-}
-
-
-void SelectionDAGLowering::visitFSub(User &I) {
-  // -0.0 - X --> fneg
-  const Type *Ty = I.getType();
-  if (isa<VectorType>(Ty)) {
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      const Type *ElTy = DestTy->getElementType();
-      unsigned VL = DestTy->getNumElements();
-      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-      if (CV == CNZ) {
-        SDValue Op2 = getValue(I.getOperand(1));
-        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                 Op2.getValueType(), Op2));
-        return;
-      }
-    }
-  }
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
-      SDValue Op2 = getValue(I.getOperand(1));
-      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                               Op2.getValueType(), Op2));
-      return;
-    }
-
-  visitBinary(I, ISD::FSUB);
-}
-
-void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-
-  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
-                           Op1.getValueType(), Op1, Op2));
-}
-
-void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  if (!isa<VectorType>(I.getType()) &&
-      Op2.getValueType() != TLI.getShiftAmountTy()) {
-    // If the operand is smaller than the shift count type, promote it.
-    EVT PTy = TLI.getPointerTy();
-    EVT STy = TLI.getShiftAmountTy();
-    if (STy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // If the operand is larger than the shift count type but the shift
-    // count type has enough bits to represent any shift value, truncate
-    // it now. This is a common case and it exposes the truncate to
-    // optimization early.
-    else if (STy.getSizeInBits() >=
-             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // Otherwise we'll need to temporarily settle for some other
-    // convenient type; type legalization will make adjustments as
-    // needed.
-    else if (PTy.bitsLT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-    else if (PTy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-  }
-
-  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
-                           Op1.getValueType(), Op1, Op2));
-}
-
-void SelectionDAGLowering::visitICmp(User &I) {
-  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
-  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
-    predicate = IC->getPredicate();
-  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
-    predicate = ICmpInst::Predicate(IC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Opcode = getICmpCondCode(predicate);
-  
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
-}
-
-void SelectionDAGLowering::visitFCmp(User &I) {
-  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
-  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
-    predicate = FC->getPredicate();
-  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
-    predicate = FCmpInst::Predicate(FC->getPredicate());
-  SDValue Op1 = getValue(I.getOperand(0));
-  SDValue Op2 = getValue(I.getOperand(1));
-  ISD::CondCode Condition = getFCmpCondCode(predicate);
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
-}
-
-void SelectionDAGLowering::visitSelect(User &I) {
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, I.getType(), ValueVTs);
-  unsigned NumValues = ValueVTs.size();
-  if (NumValues != 0) {
-    SmallVector<SDValue, 4> Values(NumValues);
-    SDValue Cond     = getValue(I.getOperand(0));
-    SDValue TrueVal  = getValue(I.getOperand(1));
-    SDValue FalseVal = getValue(I.getOperand(2));
-
-    for (unsigned i = 0; i != NumValues; ++i)
-      Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                              TrueVal.getValueType(), Cond,
-                              SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
-                              SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
-
-    setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                             DAG.getVTList(&ValueVTs[0], NumValues),
-                             &Values[0], NumValues));
-  }
-}
-
-
-void SelectionDAGLowering::visitTrunc(User &I) {
-  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitZExt(User &I) {
-  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
-  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitSExt(User &I) {
-  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
-  // SExt also can't be a cast to bool for same reason. So, nothing much to do
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitFPTrunc(User &I) {
-  // FPTrunc is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                           DestVT, N, DAG.getIntPtrConstant(0)));
-}
-
-void SelectionDAGLowering::visitFPExt(User &I){
-  // FPTrunc is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitFPToUI(User &I) {
-  // FPToUI is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitFPToSI(User &I) {
-  // FPToSI is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitUIToFP(User &I) {
-  // UIToFP is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitSIToFP(User &I){
-  // SIToFP is never a no-op cast, no need to check
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
-}
-
-void SelectionDAGLowering::visitPtrToInt(User &I) {
-  // What to do depends on the size of the integer and the size of the pointer.
-  // We can either truncate, zero extend, or no-op, accordingly.
-  SDValue N = getValue(I.getOperand(0));
-  EVT SrcVT = N.getValueType();
-  EVT DestVT = TLI.getValueType(I.getType());
-  SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
-  setValue(&I, Result);
-}
-
-void SelectionDAGLowering::visitIntToPtr(User &I) {
-  // What to do depends on the size of the integer and the size of the pointer.
-  // We can either truncate, zero extend, or no-op, accordingly.
-  SDValue N = getValue(I.getOperand(0));
-  EVT SrcVT = N.getValueType();
-  EVT DestVT = TLI.getValueType(I.getType());
-  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
-}
-
-void SelectionDAGLowering::visitBitCast(User &I) {
-  SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = TLI.getValueType(I.getType());
-
-  // BitCast assures us that source and destination are the same size so this
-  // is either a BIT_CONVERT or a no-op.
-  if (DestVT != N.getValueType())
-    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
-                             DestVT, N)); // convert types
-  else
-    setValue(&I, N); // noop cast.
-}
-
-void SelectionDAGLowering::visitInsertElement(User &I) {
-  SDValue InVec = getValue(I.getOperand(0));
-  SDValue InVal = getValue(I.getOperand(1));
-  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                                TLI.getPointerTy(),
-                                getValue(I.getOperand(2)));
-
-  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
-                           TLI.getValueType(I.getType()),
-                           InVec, InVal, InIdx));
-}
-
-void SelectionDAGLowering::visitExtractElement(User &I) {
-  SDValue InVec = getValue(I.getOperand(0));
-  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
-                                TLI.getPointerTy(),
-                                getValue(I.getOperand(1)));
-  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                           TLI.getValueType(I.getType()), InVec, InIdx));
-}
-
-
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
-      return false;
-  return true;
-}
-
-void SelectionDAGLowering::visitShuffleVector(User &I) {
-  SmallVector<int, 8> Mask;
-  SDValue Src1 = getValue(I.getOperand(0));
-  SDValue Src2 = getValue(I.getOperand(1));
-
-  // Convert the ConstantVector mask operand into an array of ints, with -1
-  // representing undef values.
-  SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), 
-                                                     MaskElts);
-  unsigned MaskNumElts = MaskElts.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (isa<UndefValue>(MaskElts[i]))
-      Mask.push_back(-1);
-    else
-      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
-  }
-  
-  EVT VT = TLI.getValueType(I.getType());
-  EVT SrcVT = Src1.getValueType();
-  unsigned SrcNumElts = SrcVT.getVectorNumElements();
-
-  if (SrcNumElts == MaskNumElts) {
-    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
-                                      &Mask[0]));
-    return;
-  }
-
-  // Normalize the shuffle vector since mask and vector length don't match.
-  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
-    // Mask is longer than the source vectors and is a multiple of the source
-    // vectors.  We can use concatenate vector to make the mask and vectors
-    // lengths match.
-    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
-      // The shuffle is concatenating two vectors together.
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                               VT, Src1, Src2));
-      return;
-    }
-
-    // Pad both vectors with undefs to make them the same length as the mask.
-    unsigned NumConcat = MaskNumElts / SrcNumElts;
-    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
-    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
-    SDValue UndefVal = DAG.getUNDEF(SrcVT);
-
-    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
-    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
-    MOps1[0] = Src1;
-    MOps2[0] = Src2;
-    
-    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 
-                                                  getCurDebugLoc(), VT, 
-                                                  &MOps1[0], NumConcat);
-    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
-                                                  getCurDebugLoc(), VT, 
-                                                  &MOps2[0], NumConcat);
-
-    // Readjust mask for new input vector length.
-    SmallVector<int, 8> MappedOps;
-    for (unsigned i = 0; i != MaskNumElts; ++i) {
-      int Idx = Mask[i];
-      if (Idx < (int)SrcNumElts)
-        MappedOps.push_back(Idx);
-      else
-        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
-    }
-    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 
-                                      &MappedOps[0]));
-    return;
-  }
-
-  if (SrcNumElts > MaskNumElts) {
-    // Analyze the access pattern of the vector to see if we can extract
-    // two subvectors and do the shuffle. The analysis is done by calculating
-    // the range of elements the mask access on both vectors.
-    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
-    int MaxRange[2] = {-1, -1};
-
-    for (unsigned i = 0; i != MaskNumElts; ++i) {
-      int Idx = Mask[i];
-      int Input = 0;
-      if (Idx < 0)
-        continue;
-      
-      if (Idx >= (int)SrcNumElts) {
-        Input = 1;
-        Idx -= SrcNumElts;
-      }
-      if (Idx > MaxRange[Input])
-        MaxRange[Input] = Idx;
-      if (Idx < MinRange[Input])
-        MinRange[Input] = Idx;
-    }
-
-    // Check if the access is smaller than the vector size and can we find
-    // a reasonable extract index.
-    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
-    int StartIdx[2];  // StartIdx to extract from
-    for (int Input=0; Input < 2; ++Input) {
-      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
-        RangeUse[Input] = 0; // Unused
-        StartIdx[Input] = 0;
-      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
-        // Fits within range but we should see if we can find a good
-        // start index that is a multiple of the mask length.
-        if (MaxRange[Input] < (int)MaskNumElts) {
-          RangeUse[Input] = 1; // Extract from beginning of the vector
-          StartIdx[Input] = 0;
-        } else {
-          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
-          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts < SrcNumElts)
-            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
-        }
-      }
-    }
-
-    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
-      setValue(&I, DAG.getUNDEF(VT));  // Vectors are not used.
-      return;
-    }
-    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
-      // Extract appropriate subvector and generate a vector shuffle
-      for (int Input=0; Input < 2; ++Input) {
-        SDValue& Src = Input == 0 ? Src1 : Src2;
-        if (RangeUse[Input] == 0) {
-          Src = DAG.getUNDEF(VT);
-        } else {
-          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
-                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
-        }
-      }
-      // Calculate new mask.
-      SmallVector<int, 8> MappedOps;
-      for (unsigned i = 0; i != MaskNumElts; ++i) {
-        int Idx = Mask[i];
-        if (Idx < 0)
-          MappedOps.push_back(Idx);
-        else if (Idx < (int)SrcNumElts)
-          MappedOps.push_back(Idx - StartIdx[0]);
-        else
-          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
-      }
-      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
-                                        &MappedOps[0]));
-      return;
-    }
-  }
-
-  // We can't use either concat vectors or extract subvectors so fall back to
-  // replacing the shuffle with extract and build vector.
-  // to insert and build vector.
-  EVT EltVT = VT.getVectorElementType();
-  EVT PtrVT = TLI.getPointerTy();
-  SmallVector<SDValue,8> Ops;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Mask[i] < 0) {
-      Ops.push_back(DAG.getUNDEF(EltVT));
-    } else {
-      int Idx = Mask[i];
-      if (Idx < (int)SrcNumElts)
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                                  EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
-      else
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                                  EltVT, Src2,
-                                  DAG.getConstant(Idx - SrcNumElts, PtrVT)));
-    }
-  }
-  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
-                           VT, &Ops[0], Ops.size()));
-}
-
-void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
-  const Value *Op0 = I.getOperand(0);
-  const Value *Op1 = I.getOperand(1);
-  const Type *AggTy = I.getType();
-  const Type *ValTy = Op1->getType();
-  bool IntoUndef = isa<UndefValue>(Op0);
-  bool FromUndef = isa<UndefValue>(Op1);
-
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
-
-  SmallVector<EVT, 4> AggValueVTs;
-  ComputeValueVTs(TLI, AggTy, AggValueVTs);
-  SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
-
-  unsigned NumAggValues = AggValueVTs.size();
-  unsigned NumValValues = ValValueVTs.size();
-  SmallVector<SDValue, 4> Values(NumAggValues);
-
-  SDValue Agg = getValue(Op0);
-  SDValue Val = getValue(Op1);
-  unsigned i = 0;
-  // Copy the beginning value(s) from the original aggregate.
-  for (; i != LinearIndex; ++i)
-    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Agg.getNode(), Agg.getResNo() + i);
-  // Copy values from the inserted value(s).
-  for (; i != LinearIndex + NumValValues; ++i)
-    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
-  // Copy remaining value(s) from the original aggregate.
-  for (; i != NumAggValues; ++i)
-    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Agg.getNode(), Agg.getResNo() + i);
-
-  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
-                           &Values[0], NumAggValues));
-}
-
-void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) {
-  const Value *Op0 = I.getOperand(0);
-  const Type *AggTy = Op0->getType();
-  const Type *ValTy = I.getType();
-  bool OutOfUndef = isa<UndefValue>(Op0);
-
-  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
-                                            I.idx_begin(), I.idx_end());
-
-  SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
-
-  unsigned NumValValues = ValValueVTs.size();
-  SmallVector<SDValue, 4> Values(NumValValues);
-
-  SDValue Agg = getValue(Op0);
-  // Copy out the selected value(s).
-  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
-    Values[i - LinearIndex] =
-      OutOfUndef ?
-        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
-        SDValue(Agg.getNode(), Agg.getResNo() + i);
-
-  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                           DAG.getVTList(&ValValueVTs[0], NumValValues),
-                           &Values[0], NumValValues));
-}
-
-
-void SelectionDAGLowering::visitGetElementPtr(User &I) {
-  SDValue N = getValue(I.getOperand(0));
-  const Type *Ty = I.getOperand(0)->getType();
-
-  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
-       OI != E; ++OI) {
-    Value *Idx = *OI;
-    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
-      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
-      if (Field) {
-        // N = N + Offset
-        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
-        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
-                        DAG.getIntPtrConstant(Offset));
-      }
-      Ty = StTy->getElementType(Field);
-    } else {
-      Ty = cast<SequentialType>(Ty)->getElementType();
-
-      // If this is a constant subscript, handle it quickly.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
-        if (CI->getZExtValue() == 0) continue;
-        uint64_t Offs =
-            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
-        SDValue OffsVal;
-        EVT PTy = TLI.getPointerTy();
-        unsigned PtrBits = PTy.getSizeInBits();
-        if (PtrBits < 64) {
-          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                                TLI.getPointerTy(),
-                                DAG.getConstant(Offs, MVT::i64));
-        } else
-          OffsVal = DAG.getIntPtrConstant(Offs);
-        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
-                        OffsVal);
-        continue;
-      }
-
-      // N = N + Idx * ElementSize;
-      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
-                                TD->getTypeAllocSize(Ty));
-      SDValue IdxN = getValue(Idx);
-
-      // If the index is smaller or larger than intptr_t, truncate or extend
-      // it.
-      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
-
-      // If this is a multiply by a power of two, turn it into a shl
-      // immediately.  This is a very common case.
-      if (ElementSize != 1) {
-        if (ElementSize.isPowerOf2()) {
-          unsigned Amt = ElementSize.logBase2();
-          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
-                             N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
-        } else {
-          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
-          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
-                             N.getValueType(), IdxN, Scale);
-        }
-      }
-
-      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                      N.getValueType(), N, IdxN);
-    }
-  }
-  setValue(&I, N);
-}
-
-void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
-  // If this is a fixed sized alloca in the entry block of the function,
-  // allocate it statically on the stack.
-  if (FuncInfo.StaticAllocaMap.count(&I))
-    return;   // getValue will auto-populate this.
-
-  const Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-  unsigned Align =
-    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
-             I.getAlignment());
-
-  SDValue AllocSize = getValue(I.getArraySize());
-  
-  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
-                          AllocSize,
-                          DAG.getConstant(TySize, AllocSize.getValueType()));
-  
-  
-  
-  EVT IntPtr = TLI.getPointerTy();
-  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
-
-  // Handle alignment.  If the requested alignment is less than or equal to
-  // the stack alignment, ignore it.  If the size is greater than or equal to
-  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
-  unsigned StackAlign =
-    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
-  if (Align <= StackAlign)
-    Align = 0;
-
-  // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size.
-  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                          AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign-1));
-  // Mask out the low bits for alignment purposes.
-  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
-                          AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
-
-  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
-  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
-  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
-                            VTs, Ops, 3);
-  setValue(&I, DSA);
-  DAG.setRoot(DSA.getValue(1));
-
-  // Inform the Frame Information that we have just allocated a variable-sized
-  // object.
-  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
-}
-
-void SelectionDAGLowering::visitLoad(LoadInst &I) {
-  const Value *SV = I.getOperand(0);
-  SDValue Ptr = getValue(SV);
-
-  const Type *Ty = I.getType();
-  bool isVolatile = I.isVolatile();
-  unsigned Alignment = I.getAlignment();
-
-  SmallVector<EVT, 4> ValueVTs;
-  SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
-  unsigned NumValues = ValueVTs.size();
-  if (NumValues == 0)
-    return;
-
-  SDValue Root;
-  bool ConstantMemory = false;
-  if (I.isVolatile())
-    // Serialize volatile loads with other side effects.
-    Root = getRoot();
-  else if (AA->pointsToConstantMemory(SV)) {
-    // Do not serialize (non-volatile) loads of constant memory with anything.
-    Root = DAG.getEntryNode();
-    ConstantMemory = true;
-  } else {
-    // Do not serialize non-volatile loads against each other.
-    Root = DAG.getRoot();
-  }
-
-  SmallVector<SDValue, 4> Values(NumValues);
-  SmallVector<SDValue, 4> Chains(NumValues);
-  EVT PtrVT = Ptr.getValueType();
-  for (unsigned i = 0; i != NumValues; ++i) {
-    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                                        PtrVT, Ptr,
-                                        DAG.getConstant(Offsets[i], PtrVT)),
-                            SV, Offsets[i], isVolatile, Alignment);
-    Values[i] = L;
-    Chains[i] = L.getValue(1);
-  }
-
-  if (!ConstantMemory) {
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                                  MVT::Other,
-                                  &Chains[0], NumValues);
-    if (isVolatile)
-      DAG.setRoot(Chain);
-    else
-      PendingLoads.push_back(Chain);
-  }
-
-  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
-                           DAG.getVTList(&ValueVTs[0], NumValues),
-                           &Values[0], NumValues));
-}
-
-
-void SelectionDAGLowering::visitStore(StoreInst &I) {
-  Value *SrcV = I.getOperand(0);
-  Value *PtrV = I.getOperand(1);
-
-  SmallVector<EVT, 4> ValueVTs;
-  SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
-  unsigned NumValues = ValueVTs.size();
-  if (NumValues == 0)
-    return;
-
-  // Get the lowered operands. Note that we do this after
-  // checking if NumResults is zero, because with zero results
-  // the operands won't have values in the map.
-  SDValue Src = getValue(SrcV);
-  SDValue Ptr = getValue(PtrV);
-
-  SDValue Root = getRoot();
-  SmallVector<SDValue, 4> Chains(NumValues);
-  EVT PtrVT = Ptr.getValueType();
-  bool isVolatile = I.isVolatile();
-  unsigned Alignment = I.getAlignment();
-  for (unsigned i = 0; i != NumValues; ++i)
-    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
-                             SDValue(Src.getNode(), Src.getResNo() + i),
-                             DAG.getNode(ISD::ADD, getCurDebugLoc(),
-                                         PtrVT, Ptr,
-                                         DAG.getConstant(Offsets[i], PtrVT)),
-                             PtrV, Offsets[i], isVolatile, Alignment);
-
-  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                          MVT::Other, &Chains[0], NumValues));
-}
-
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I,
-                                                unsigned Intrinsic) {
-  bool HasChain = !I.doesNotAccessMemory();
-  bool OnlyLoad = HasChain && I.onlyReadsMemory();
-
-  // Build the operand list.
-  SmallVector<SDValue, 8> Ops;
-  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
-    if (OnlyLoad) {
-      // We don't need to serialize loads against other loads.
-      Ops.push_back(DAG.getRoot());
-    } else {
-      Ops.push_back(getRoot());
-    }
-  }
-
-  // Info is set by getTgtMemInstrinsic
-  TargetLowering::IntrinsicInfo Info;
-  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
-
-  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
-  if (!IsTgtIntrinsic)
-    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
-
-  // Add all operands of the call to the operand list.
-  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
-    SDValue Op = getValue(I.getOperand(i));
-    assert(TLI.isTypeLegal(Op.getValueType()) &&
-           "Intrinsic uses a non-legal type?");
-    Ops.push_back(Op);
-  }
-
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
-  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
-    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
-           "Intrinsic uses a non-legal type?");
-  }
-#endif // NDEBUG
-  if (HasChain)
-    ValueVTs.push_back(MVT::Other);
-
-  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
-
-  // Create the node.
-  SDValue Result;
-  if (IsTgtIntrinsic) {
-    // This is target intrinsic that touches memory
-    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
-                                     VTs, &Ops[0], Ops.size(),
-                                     Info.memVT, Info.ptrVal, Info.offset,
-                                     Info.align, Info.vol,
-                                     Info.readMem, Info.writeMem);
-  }
-  else if (!HasChain)
-    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
-                         VTs, &Ops[0], Ops.size());
-  else if (I.getType() != Type::getVoidTy(*DAG.getContext()))
-    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
-                         VTs, &Ops[0], Ops.size());
-  else
-    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
-                         VTs, &Ops[0], Ops.size());
-
-  if (HasChain) {
-    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
-    if (OnlyLoad)
-      PendingLoads.push_back(Chain);
-    else
-      DAG.setRoot(Chain);
-  }
-  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
-    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
-      EVT VT = TLI.getValueType(PTy);
-      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
-    }
-    setValue(&I, Result);
-  }
-}
-
-/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
-static GlobalVariable *ExtractTypeInfo(Value *V) {
-  V = V->stripPointerCasts();
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-  assert ((GV || isa<ConstantPointerNull>(V)) &&
-          "TypeInfo must be a global variable or NULL");
-  return GV;
-}
-
-namespace llvm {
-
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
-                  MachineBasicBlock *MBB) {
-  // Inform the MachineModuleInfo of the personality for this landing pad.
-  ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
-  assert(CE->getOpcode() == Instruction::BitCast &&
-         isa<Function>(CE->getOperand(0)) &&
-         "Personality should be a function");
-  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
-
-  // Gather all the type infos for this landing pad and pass them along to
-  // MachineModuleInfo.
-  std::vector<GlobalVariable *> TyInfo;
-  unsigned N = I.getNumOperands();
-
-  for (unsigned i = N - 1; i > 2; --i) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert (FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N) {
-        TyInfo.reserve(N - FirstCatch);
-        for (unsigned j = FirstCatch; j < N; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-        MMI->addCatchTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      if (!FilterLength) {
-        // Cleanup.
-        MMI->addCleanup(MBB);
-      } else {
-        // Filter.
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-        MMI->addFilterTypeInfo(MBB, TyInfo);
-        TyInfo.clear();
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 3) {
-    TyInfo.reserve(N - 3);
-    for (unsigned j = 3; j < N; ++j)
-      TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
-    MMI->addCatchTypeInfo(MBB, TyInfo);
-  }
-}
-
-}
-
-/// GetSignificand - Get the significand and build it into a floating-point
-/// number with exponent of 1:
-///
-///   Op = (Op & 0x007fffff) | 0x3f800000;
-///
-/// where Op is the hexidecimal representation of floating point value.
-static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
-  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x007fffff, MVT::i32));
-  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
-                           DAG.getConstant(0x3f800000, MVT::i32));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
-}
-
-/// GetExponent - Get the exponent:
-///
-///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
-///
-/// where Op is the hexidecimal representation of floating point value.
-static SDValue
-GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
-            DebugLoc dl) {
-  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
-                           DAG.getConstant(0x7f800000, MVT::i32));
-  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
-                           DAG.getConstant(23, TLI.getPointerTy()));
-  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
-                           DAG.getConstant(127, MVT::i32));
-  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
-}
-
-/// getF32Constant - Get 32-bit floating point constant.
-static SDValue
-getF32Constant(SelectionDAG &DAG, unsigned Flt) {
-  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
-}
-
-/// Inlined utility function to implement binary input atomic intrinsics for
-/// visitIntrinsicCall: I is a call instruction
-///                     Op is the associated NodeType for I
-const char *
-SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
-  SDValue Root = getRoot();
-  SDValue L =
-    DAG.getAtomic(Op, getCurDebugLoc(),
-                  getValue(I.getOperand(2)).getValueType().getSimpleVT(),
-                  Root,
-                  getValue(I.getOperand(1)),
-                  getValue(I.getOperand(2)),
-                  I.getOperand(1));
-  setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
-  return 0;
-}
-
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
-  SDValue Op1 = getValue(I.getOperand(1));
-  SDValue Op2 = getValue(I.getOperand(2));
-
-  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-  SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
-
-  setValue(&I, Result);
-  return 0;
-}
-
-/// visitExp - Lower an exp intrinsic. Handles the special sequences for
-/// limited-precision mode.
-void
-SelectionDAGLowering::visitExp(CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(1));
-
-    // Put the exponent in the right bit position for later addition to the
-    // final result:
-    //
-    //   #define LOG2OFe 1.4426950f
-    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
-    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
-                             getF32Constant(DAG, 0x3fb8aa3b));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // 0.000107046256 error, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                               TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      //
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
-                                             MVT::i32, t13);
-
-      // Add the exponent into the result in integer domain.
-      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                TwoToFracPartOfX, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitLog - Lower a log intrinsic. Handles the special sequences for
-/// limited-precision mode.
-void
-SelectionDAGLowering::visitLog(CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(1));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
-
-    // Scale the exponent by log(2) [0.69314718f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
-    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3f317218));
-
-    // Get the significand and build it into a floating-point number with
-    // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl);
-
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   LogofMantissa =
-      //     -1.1609546f +
-      //       (1.4034025f - 0.23903021f * x) * x;
-      //
-      // error 0.0034276066, which is better than 8 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbe74c456));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3fb3a2b1));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                          getF32Constant(DAG, 0x3f949a29));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   LogOfMantissa =
-      //     -1.7417939f +
-      //       (2.8212026f +
-      //         (-1.4699568f +
-      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
-      //
-      // error 0.000061011436, which is 14 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbd67b6d6));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ee4f4b8));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fbc278b));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40348e95));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                          getF32Constant(DAG, 0x3fdef31a));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   LogOfMantissa =
-      //     -2.1072184f +
-      //       (4.2372794f +
-      //         (-3.7029485f +
-      //           (2.2781945f +
-      //             (-0.87823314f +
-      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
-      //
-      // error 0.0000023660568, which is better than 18 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbc91e5ac));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e4350aa));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f60d3e3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x4011cdf0));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x406cfd1c));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x408797cb));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                          getF32Constant(DAG, 0x4006dcab));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, LogOfMantissa);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
-/// limited-precision mode.
-void
-SelectionDAGLowering::visitLog2(CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(1));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
-
-    // Get the exponent.
-    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
-
-    // Get the significand and build it into a floating-point number with
-    // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl);
-
-    // Different possible minimax approximations of significand in
-    // floating-point for various degrees of accuracy over [1,2].
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
-      //
-      // error 0.0049451742, which is more than 7 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbeb08fe0));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x40019463));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                           getF32Constant(DAG, 0x3fd6633d));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   Log2ofMantissa =
-      //     -2.51285454f +
-      //       (4.07009056f +
-      //         (-2.12067489f +
-      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
-      //
-      // error 0.0000876136000, which is better than 13 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbda7262e));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f25280b));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x4007b923));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40823e2f));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                                           getF32Constant(DAG, 0x4020d29c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   Log2ofMantissa =
-      //     -3.0400495f +
-      //       (6.1129976f +
-      //         (-5.3420409f +
-      //           (3.2865683f +
-      //             (-1.2669343f +
-      //               (0.27515199f -
-      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
-      //
-      // error 0.0000018516, which is better than 18 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbcd2769e));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e8ce0b9));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3fa22ae7));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x40525723));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x40aaf200));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x40c39dad));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
-                                           getF32Constant(DAG, 0x4042902c));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log2ofMantissa);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG2, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
-/// limited-precision mode.
-void
-SelectionDAGLowering::visitLog10(CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(1));
-    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
-
-    // Scale the exponent by log10(2) [0.30102999f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
-    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
-                                        getF32Constant(DAG, 0x3e9a209a));
-
-    // Get the significand and build it into a floating-point number with
-    // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl);
-
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   Log10ofMantissa =
-      //     -0.50419619f +
-      //       (0.60948995f - 0.10380950f * x) * x;
-      //
-      // error 0.0014886165, which is 6 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0xbdd49a13));
-      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3f1c0789));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
-                                            getF32Constant(DAG, 0x3f011300));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   Log10ofMantissa =
-      //     -0.64831180f +
-      //       (0.91751397f +
-      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
-      //
-      // error 0.00019228036, which is better than 12 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3d431f31));
-      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3ea21fb2));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f6ae232));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                                            getF32Constant(DAG, 0x3f25f7c3));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   Log10ofMantissa =
-      //     -0.84299375f +
-      //       (1.5327582f +
-      //         (-1.0688956f +
-      //           (0.49102474f +
-      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
-      //
-      // error 0.0000037995730, which is better than 18 bits
-      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3c5d51ce));
-      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
-                               getF32Constant(DAG, 0x3e00685a));
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3efb6798));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f88d192));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3fc4316c));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
-                                            getF32Constant(DAG, 0x3f57ce70));
-
-      result = DAG.getNode(ISD::FADD, dl,
-                           MVT::f32, LogOfExponent, Log10ofMantissa);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FLOG10, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
-/// limited-precision mode.
-void
-SelectionDAGLowering::visitExp2(CallInst &I) {
-  SDValue result;
-  DebugLoc dl = getCurDebugLoc();
-
-  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(1));
-
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FEXP2, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitPow - Lower a pow intrinsic. Handles the special sequences for
-/// limited-precision mode with x == 10.0f.
-void
-SelectionDAGLowering::visitPow(CallInst &I) {
-  SDValue result;
-  Value *Val = I.getOperand(1);
-  DebugLoc dl = getCurDebugLoc();
-  bool IsExp10 = false;
-
-  if (getValue(Val).getValueType() == MVT::f32 &&
-      getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
-      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-        APFloat Ten(10.0f);
-        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
-      }
-    }
-  }
-
-  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
-    SDValue Op = getValue(I.getOperand(2));
-
-    // Put the exponent in the right bit position for later addition to the
-    // final result:
-    //
-    //   #define LOG2OF10 3.3219281f
-    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
-    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
-                             getF32Constant(DAG, 0x40549a78));
-    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
-    //   FractionalPartOfX = x - (float)IntegerPartOfX;
-    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
-    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
-    //   IntegerPartOfX <<= 23;
-    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-                                 DAG.getConstant(23, TLI.getPointerTy()));
-
-    if (LimitFloatPrecision <= 6) {
-      // For floating-point precision of 6:
-      //
-      //   twoToFractionalPartOfX =
-      //     0.997535578f +
-      //       (0.735607626f + 0.252464424f * x) * x;
-      //
-      // error 0.0144103317, which is 6 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3e814304));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3f3c50c8));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f7f5e7e));
-      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
-      // For floating-point precision of 12:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999892986f +
-      //       (0.696457318f +
-      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
-      //
-      // error 0.000107046256, which is 13 to 14 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3da235e3));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3e65b8f3));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3f324b07));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3f7ff8fd));
-      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
-      // For floating-point precision of 18:
-      //
-      //   TwoToFractionalPartOfX =
-      //     0.999999982f +
-      //       (0.693148872f +
-      //         (0.240227044f +
-      //           (0.554906021e-1f +
-      //             (0.961591928e-2f +
-      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
-      // error 2.47208000*10^(-7), which is better than 18 bits
-      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
-                               getF32Constant(DAG, 0x3924b03e));
-      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
-                               getF32Constant(DAG, 0x3ab24b87));
-      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
-      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
-                               getF32Constant(DAG, 0x3c1d8c17));
-      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
-      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
-                               getF32Constant(DAG, 0x3d634a1d));
-      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
-      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
-                               getF32Constant(DAG, 0x3e75fe14));
-      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
-      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
-                                getF32Constant(DAG, 0x3f317234));
-      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
-      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
-                                getF32Constant(DAG, 0x3f800000));
-      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
-      SDValue TwoToFractionalPartOfX =
-        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
-      result = DAG.getNode(ISD::BIT_CONVERT, dl,
-                           MVT::f32, TwoToFractionalPartOfX);
-    }
-  } else {
-    // No special expansion.
-    result = DAG.getNode(ISD::FPOW, dl,
-                         getValue(I.getOperand(1)).getValueType(),
-                         getValue(I.getOperand(1)),
-                         getValue(I.getOperand(2)));
-  }
-
-  setValue(&I, result);
-}
-
-/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
-/// we want to emit this as a call to a named external function, return the name
-/// otherwise lower it and return null.
-const char *
-SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
-  DebugLoc dl = getCurDebugLoc();
-  switch (Intrinsic) {
-  default:
-    // By default, turn this into a target intrinsic node.
-    visitTargetIntrinsic(I, Intrinsic);
-    return 0;
-  case Intrinsic::vastart:  visitVAStart(I); return 0;
-  case Intrinsic::vaend:    visitVAEnd(I); return 0;
-  case Intrinsic::vacopy:   visitVACopy(I); return 0;
-  case Intrinsic::returnaddress:
-    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::frameaddress:
-    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
-    break;
-  case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
-    break;
-  case Intrinsic::memcpy: {
-    SDValue Op1 = getValue(I.getOperand(1));
-    SDValue Op2 = getValue(I.getOperand(2));
-    SDValue Op3 = getValue(I.getOperand(3));
-    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                              I.getOperand(1), 0, I.getOperand(2), 0));
-    return 0;
-  }
-  case Intrinsic::memset: {
-    SDValue Op1 = getValue(I.getOperand(1));
-    SDValue Op2 = getValue(I.getOperand(2));
-    SDValue Op3 = getValue(I.getOperand(3));
-    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
-                              I.getOperand(1), 0));
-    return 0;
-  }
-  case Intrinsic::memmove: {
-    SDValue Op1 = getValue(I.getOperand(1));
-    SDValue Op2 = getValue(I.getOperand(2));
-    SDValue Op3 = getValue(I.getOperand(3));
-    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
-
-    // If the source and destination are known to not be aliases, we can
-    // lower memmove as memcpy.
-    uint64_t Size = -1ULL;
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
-      Size = C->getZExtValue();
-    if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
-        AliasAnalysis::NoAlias) {
-      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
-                                I.getOperand(1), 0, I.getOperand(2), 0));
-      return 0;
-    }
-
-    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
-                               I.getOperand(1), 0, I.getOperand(2), 0));
-    return 0;
-  }
-  case Intrinsic::dbg_stoppoint: 
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return 0;
-  case Intrinsic::dbg_declare: {
-    if (OptLevel != CodeGenOpt::None) 
-      // FIXME: Variable debug info is not supported here.
-      return 0;
-    DwarfWriter *DW = DAG.getDwarfWriter();
-    if (!DW)
-      return 0;
-    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
-      return 0;
-
-    MDNode *Variable = DI.getVariable();
-    Value *Address = DI.getAddress();
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
-      Address = BCI->getOperand(0);
-    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-    // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
-      return 0;
-    DenseMap<const AllocaInst*, int>::iterator SI =
-      FuncInfo.StaticAllocaMap.find(AI);
-    if (SI == FuncInfo.StaticAllocaMap.end()) 
-      return 0; // VLAs.
-    int FI = SI->second;
-
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-    if (MMI) {
-      MetadataContext &TheMetadata = 
-        DI.getParent()->getContext().getMetadata();
-      unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
-      MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
-      MMI->setVariableDbgInfo(Variable, FI, Dbg);
-    }
-    return 0;
-  }
-  case Intrinsic::eh_exception: {
-    // Insert the EXCEPTIONADDR instruction.
-    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[1];
-    Ops[0] = DAG.getRoot();
-    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
-    setValue(&I, Op);
-    DAG.setRoot(Op.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::eh_selector: {
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-
-    if (CurMBB->isLandingPad())
-      AddCatchInfo(I, MMI, CurMBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(&I);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) CurMBB->addLiveIn(Reg);
-    }
-
-    // Insert the EHSELECTION instruction.
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[2];
-    Ops[0] = getValue(I.getOperand(1));
-    Ops[1] = getRoot();
-    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-
-    DAG.setRoot(Op.getValue(1));
-
-    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
-    return 0;
-  }
-
-  case Intrinsic::eh_typeid_for: {
-    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-
-    if (MMI) {
-      // Find the type id for the given typeinfo.
-      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
-
-      unsigned TypeID = MMI->getTypeIDFor(GV);
-      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
-    } else {
-      // Return something different to eh_selector.
-      setValue(&I, DAG.getConstant(1, MVT::i32));
-    }
-
-    return 0;
-  }
-
-  case Intrinsic::eh_return_i32:
-  case Intrinsic::eh_return_i64:
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
-      MMI->setCallsEHReturn(true);
-      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
-                              MVT::Other,
-                              getControlRoot(),
-                              getValue(I.getOperand(1)),
-                              getValue(I.getOperand(2))));
-    } else {
-      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
-    }
-
-    return 0;
-  case Intrinsic::eh_unwind_init:
-    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
-      MMI->setCallsUnwindInit(true);
-    }
-
-    return 0;
-
-  case Intrinsic::eh_dwarf_cfa: {
-    EVT VT = getValue(I.getOperand(1)).getValueType();
-    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
-                                        TLI.getPointerTy());
-
-    SDValue Offset = DAG.getNode(ISD::ADD, dl,
-                                 TLI.getPointerTy(),
-                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
-                                             TLI.getPointerTy()),
-                                 CfaArg);
-    setValue(&I, DAG.getNode(ISD::ADD, dl,
-                             TLI.getPointerTy(),
-                             DAG.getNode(ISD::FRAMEADDR, dl,
-                                         TLI.getPointerTy(),
-                                         DAG.getConstant(0,
-                                                         TLI.getPointerTy())),
-                             Offset));
-    return 0;
-  }
-  case Intrinsic::convertff:
-  case Intrinsic::convertfsi:
-  case Intrinsic::convertfui:
-  case Intrinsic::convertsif:
-  case Intrinsic::convertuif:
-  case Intrinsic::convertss:
-  case Intrinsic::convertsu:
-  case Intrinsic::convertus:
-  case Intrinsic::convertuu: {
-    ISD::CvtCode Code = ISD::CVT_INVALID;
-    switch (Intrinsic) {
-    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
-    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
-    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
-    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
-    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
-    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
-    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
-    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
-    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
-    }
-    EVT DestVT = TLI.getValueType(I.getType());
-    Value* Op1 = I.getOperand(1);
-    setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
-                                DAG.getValueType(DestVT),
-                                DAG.getValueType(getValue(Op1).getValueType()),
-                                getValue(I.getOperand(2)),
-                                getValue(I.getOperand(3)),
-                                Code));
-    return 0;
-  }
-
-  case Intrinsic::sqrt:
-    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
-                             getValue(I.getOperand(1)).getValueType(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::powi:
-    setValue(&I, DAG.getNode(ISD::FPOWI, dl,
-                             getValue(I.getOperand(1)).getValueType(),
-                             getValue(I.getOperand(1)),
-                             getValue(I.getOperand(2))));
-    return 0;
-  case Intrinsic::sin:
-    setValue(&I, DAG.getNode(ISD::FSIN, dl,
-                             getValue(I.getOperand(1)).getValueType(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::cos:
-    setValue(&I, DAG.getNode(ISD::FCOS, dl,
-                             getValue(I.getOperand(1)).getValueType(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::log:
-    visitLog(I);
-    return 0;
-  case Intrinsic::log2:
-    visitLog2(I);
-    return 0;
-  case Intrinsic::log10:
-    visitLog10(I);
-    return 0;
-  case Intrinsic::exp:
-    visitExp(I);
-    return 0;
-  case Intrinsic::exp2:
-    visitExp2(I);
-    return 0;
-  case Intrinsic::pow:
-    visitPow(I);
-    return 0;
-  case Intrinsic::pcmarker: {
-    SDValue Tmp = getValue(I.getOperand(1));
-    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
-    return 0;
-  }
-  case Intrinsic::readcyclecounter: {
-    SDValue Op = getRoot();
-    SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
-                              DAG.getVTList(MVT::i64, MVT::Other),
-                              &Op, 1);
-    setValue(&I, Tmp);
-    DAG.setRoot(Tmp.getValue(1));
-    return 0;
-  }
-  case Intrinsic::bswap:
-    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
-                             getValue(I.getOperand(1)).getValueType(),
-                             getValue(I.getOperand(1))));
-    return 0;
-  case Intrinsic::cttz: {
-    SDValue Arg = getValue(I.getOperand(1));
-    EVT Ty = Arg.getValueType();
-    SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
-    setValue(&I, result);
-    return 0;
-  }
-  case Intrinsic::ctlz: {
-    SDValue Arg = getValue(I.getOperand(1));
-    EVT Ty = Arg.getValueType();
-    SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
-    setValue(&I, result);
-    return 0;
-  }
-  case Intrinsic::ctpop: {
-    SDValue Arg = getValue(I.getOperand(1));
-    EVT Ty = Arg.getValueType();
-    SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
-    setValue(&I, result);
-    return 0;
-  }
-  case Intrinsic::stacksave: {
-    SDValue Op = getRoot();
-    SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
-              DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
-    setValue(&I, Tmp);
-    DAG.setRoot(Tmp.getValue(1));
-    return 0;
-  }
-  case Intrinsic::stackrestore: {
-    SDValue Tmp = getValue(I.getOperand(1));
-    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));
-    return 0;
-  }
-  case Intrinsic::stackprotector: {
-    // Emit code into the DAG to store the stack guard onto the stack.
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    EVT PtrTy = TLI.getPointerTy();
-
-    SDValue Src = getValue(I.getOperand(1));   // The guard's value.
-    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
-
-    int FI = FuncInfo.StaticAllocaMap[Slot];
-    MFI->setStackProtectorIndex(FI);
-
-    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
-
-    // Store the stack protector onto the stack.
-    SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
-                                  PseudoSourceValue::getFixedStack(FI),
-                                  0, true);
-    setValue(&I, Result);
-    DAG.setRoot(Result);
-    return 0;
-  }
-  case Intrinsic::objectsize: {
-    // If we don't know by now, we're never going to know.
-    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
-
-    assert(CI && "Non-constant type in __builtin_object_size?");
-
-    SDValue Arg = getValue(I.getOperand(0));
-    EVT Ty = Arg.getValueType();
-
-    if (CI->getZExtValue() < 2)
-      setValue(&I, DAG.getConstant(-1ULL, Ty));
-    else
-      setValue(&I, DAG.getConstant(0, Ty));
-    return 0;
-  }
-  case Intrinsic::var_annotation:
-    // Discard annotate attributes
-    return 0;
-
-  case Intrinsic::init_trampoline: {
-    const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
-
-    SDValue Ops[6];
-    Ops[0] = getRoot();
-    Ops[1] = getValue(I.getOperand(1));
-    Ops[2] = getValue(I.getOperand(2));
-    Ops[3] = getValue(I.getOperand(3));
-    Ops[4] = DAG.getSrcValue(I.getOperand(1));
-    Ops[5] = DAG.getSrcValue(F);
-
-    SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
-                              DAG.getVTList(TLI.getPointerTy(), MVT::Other),
-                              Ops, 6);
-
-    setValue(&I, Tmp);
-    DAG.setRoot(Tmp.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::gcroot:
-    if (GFI) {
-      Value *Alloca = I.getOperand(1);
-      Constant *TypeMap = cast<Constant>(I.getOperand(2));
-
-      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
-      GFI->addStackRoot(FI->getIndex(), TypeMap);
-    }
-    return 0;
-
-  case Intrinsic::gcread:
-  case Intrinsic::gcwrite:
-    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
-    return 0;
-
-  case Intrinsic::flt_rounds: {
-    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
-    return 0;
-  }
-
-  case Intrinsic::trap: {
-    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
-    return 0;
-  }
-
-  case Intrinsic::uadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::UADDO);
-  case Intrinsic::sadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::SADDO);
-  case Intrinsic::usub_with_overflow:
-    return implVisitAluOverflow(I, ISD::USUBO);
-  case Intrinsic::ssub_with_overflow:
-    return implVisitAluOverflow(I, ISD::SSUBO);
-  case Intrinsic::umul_with_overflow:
-    return implVisitAluOverflow(I, ISD::UMULO);
-  case Intrinsic::smul_with_overflow:
-    return implVisitAluOverflow(I, ISD::SMULO);
-
-  case Intrinsic::prefetch: {
-    SDValue Ops[4];
-    Ops[0] = getRoot();
-    Ops[1] = getValue(I.getOperand(1));
-    Ops[2] = getValue(I.getOperand(2));
-    Ops[3] = getValue(I.getOperand(3));
-    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
-    return 0;
-  }
-
-  case Intrinsic::memory_barrier: {
-    SDValue Ops[6];
-    Ops[0] = getRoot();
-    for (int x = 1; x < 6; ++x)
-      Ops[x] = getValue(I.getOperand(x));
-
-    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
-    return 0;
-  }
-  case Intrinsic::atomic_cmp_swap: {
-    SDValue Root = getRoot();
-    SDValue L =
-      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
-                    getValue(I.getOperand(2)).getValueType().getSimpleVT(),
-                    Root,
-                    getValue(I.getOperand(1)),
-                    getValue(I.getOperand(2)),
-                    getValue(I.getOperand(3)),
-                    I.getOperand(1));
-    setValue(&I, L);
-    DAG.setRoot(L.getValue(1));
-    return 0;
-  }
-  case Intrinsic::atomic_load_add:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
-  case Intrinsic::atomic_load_sub:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
-  case Intrinsic::atomic_load_or:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
-  case Intrinsic::atomic_load_xor:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
-  case Intrinsic::atomic_load_and:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
-  case Intrinsic::atomic_load_nand:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
-  case Intrinsic::atomic_load_max:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
-  case Intrinsic::atomic_load_min:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
-  case Intrinsic::atomic_load_umin:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
-  case Intrinsic::atomic_load_umax:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
-  case Intrinsic::atomic_swap:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
-
-  case Intrinsic::invariant_start:
-  case Intrinsic::lifetime_start:
-    // Discard region information.
-    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
-    return 0;
-  case Intrinsic::invariant_end:
-  case Intrinsic::lifetime_end:
-    // Discard region information.
-    return 0;
-  }
-}
-
-/// Test if the given instruction is in a position to be optimized
-/// with a tail-call. This roughly means that it's in a block with
-/// a return and there's nothing that needs to be scheduled
-/// between it and the return.
-///
-/// This function only tests target-independent requirements.
-/// For target-dependent requirements, a target should override
-/// TargetLowering::IsEligibleForTailCallOptimization.
-///
-static bool
-isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
-                     const TargetLowering &TLI) {
-  const BasicBlock *ExitBB = I->getParent();
-  const TerminatorInst *Term = ExitBB->getTerminator();
-  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
-  const Function *F = ExitBB->getParent();
-
-  // The block must end in a return statement or an unreachable.
-  if (!Ret && !isa<UnreachableInst>(Term)) return false;
-
-  // If I will have a chain, make sure no other instruction that will have a
-  // chain interposes between I and the return.
-  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
-    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
-         --BBI) {
-      if (&*BBI == I)
-        break;
-      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
-        return false;
-    }
-
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
-
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Conservatively require the attributes of the call to match those of
-  // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
-  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
-    return false;
-
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
-       U = dyn_cast<Instruction>(U->getOperand(0))) {
-    if (!U)
-      return false;
-    if (!U->hasOneUse())
-      return false;
-    if (U == I)
-      break;
-    // Check for a truly no-op truncate.
-    if (isa<TruncInst>(U) &&
-        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
-      continue;
-    // Check for a truly no-op bitcast.
-    if (isa<BitCastInst>(U) &&
-        (U->getOperand(0)->getType() == U->getType() ||
-         (isa<PointerType>(U->getOperand(0)->getType()) &&
-          isa<PointerType>(U->getType()))))
-      continue;
-    // Otherwise it's not a true no-op.
-    return false;
-  }
-
-  return true;
-}
-
-void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
-                                       bool isTailCall,
-                                       MachineBasicBlock *LandingPad) {
-  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  const Type *RetTy = FTy->getReturnType();
-  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-  unsigned BeginLabel = 0, EndLabel = 0;
-
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  Args.reserve(CS.arg_size());
-
-  // Check whether the function can return without sret-demotion.
-  SmallVector<EVT, 4> OutVTs;
-  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
-  SmallVector<uint64_t, 4> Offsets;
-  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), 
-    OutVTs, OutsFlags, TLI, &Offsets);
-  
-
-  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 
-                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
-
-  SDValue DemoteStackSlot;
-
-  if (!CanLowerReturn) {
-    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
-                      FTy->getReturnType());
-    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
-                      FTy->getReturnType());
-    MachineFunction &MF = DAG.getMachineFunction();
-    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
-    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
-    Entry.Node = DemoteStackSlot;
-    Entry.Ty = StackSlotPtrType;
-    Entry.isSExt = false;
-    Entry.isZExt = false;
-    Entry.isInReg = false;
-    Entry.isSRet = true;
-    Entry.isNest = false;
-    Entry.isByVal = false;
-    Entry.Alignment = Align;
-    Args.push_back(Entry);
-    RetTy = Type::getVoidTy(FTy->getContext());
-  }
-
-  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
-       i != e; ++i) {
-    SDValue ArgNode = getValue(*i);
-    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
-
-    unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
-    Entry.Alignment = CS.getParamAlignment(attrInd);
-    Args.push_back(Entry);
-  }
-
-  if (LandingPad && MMI) {
-    // Insert a label before the invoke call to mark the try range.  This can be
-    // used to detect deletion of the invoke via the MachineModuleInfo.
-    BeginLabel = MMI->NextLabelID();
-
-    // Both PendingLoads and PendingExports must be flushed here;
-    // this call might not return.
-    (void)getRoot();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getControlRoot(), BeginLabel));
-  }
-
-  // Check if target-independent constraints permit a tail call here.
-  // Target-dependent constraints are checked within TLI.LowerCallTo.
-  if (isTailCall &&
-      !isInTailCallPosition(CS.getInstruction(),
-                            CS.getAttributes().getRetAttributes(),
-                            TLI))
-    isTailCall = false;
-
-  std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), RetTy,
-                    CS.paramHasAttr(0, Attribute::SExt),
-                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
-                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
-                    CS.getCallingConv(),
-                    isTailCall,
-                    !CS.getInstruction()->use_empty(),
-                    Callee, Args, DAG, getCurDebugLoc());
-  assert((isTailCall || Result.second.getNode()) &&
-         "Non-null chain expected with non-tail call!");
-  assert((Result.second.getNode() || !Result.first.getNode()) &&
-         "Null value expected with tail call!");
-  if (Result.first.getNode())
-    setValue(CS.getInstruction(), Result.first);
-  else if (!CanLowerReturn && Result.second.getNode()) {
-    // The instruction result is the result of loading from the
-    // hidden sret parameter.
-    SmallVector<EVT, 1> PVTs;
-    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
-    ComputeValueVTs(TLI, PtrRetTy, PVTs);
-    assert(PVTs.size() == 1 && "Pointers should fit in one register");
-    EVT PtrVT = PVTs[0];
-    unsigned NumValues = OutVTs.size();
-    SmallVector<SDValue, 4> Values(NumValues);
-    SmallVector<SDValue, 4> Chains(NumValues);
-
-    for (unsigned i = 0; i < NumValues; ++i) {
-      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
-        DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
-        DAG.getConstant(Offsets[i], PtrVT)),
-        NULL, Offsets[i], false, 1);
-      Values[i] = L;
-      Chains[i] = L.getValue(1);
-    }
-    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
-                                MVT::Other, &Chains[0], NumValues);
-    PendingLoads.push_back(Chain);
-
-    setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
-             getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
-             &Values[0], NumValues));
-  }
-  // As a special case, a null chain means that a tail call has
-  // been emitted and the DAG root is already updated.
-  if (Result.second.getNode())
-    DAG.setRoot(Result.second);
-  else
-    HasTailCall = true;
-
-  if (LandingPad && MMI) {
-    // Insert a label at the end of the invoke call to mark the try range.  This
-    // can be used to detect deletion of the invoke via the MachineModuleInfo.
-    EndLabel = MMI->NextLabelID();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getRoot(), EndLabel));
-
-    // Inform MachineModuleInfo of range.
-    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
-  }
-}
-
-
-void SelectionDAGLowering::visitCall(CallInst &I) {
-  const char *RenameFn = 0;
-  if (Function *F = I.getCalledFunction()) {
-    if (F->isDeclaration()) {
-      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
-      if (II) {
-        if (unsigned IID = II->getIntrinsicID(F)) {
-          RenameFn = visitIntrinsicCall(I, IID);
-          if (!RenameFn)
-            return;
-        }
-      }
-      if (unsigned IID = F->getIntrinsicID()) {
-        RenameFn = visitIntrinsicCall(I, IID);
-        if (!RenameFn)
-          return;
-      }
-    }
-
-    // Check for well-known libc/libm calls.  If the function is internal, it
-    // can't be a library call.
-    if (!F->hasLocalLinkage() && F->hasName()) {
-      StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf") {
-        if (I.getNumOperands() == 3 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType() &&
-            I.getType() == I.getOperand(2)->getType()) {
-          SDValue LHS = getValue(I.getOperand(1));
-          SDValue RHS = getValue(I.getOperand(2));
-          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
-                                   LHS.getValueType(), LHS, RHS));
-          return;
-        }
-      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
-        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType()) {
-          SDValue Tmp = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
-          return;
-        }
-      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
-        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
-          return;
-        }
-      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
-        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
-          return;
-        }
-      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
-        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
-            I.getType() == I.getOperand(1)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getOperand(1));
-          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
-          return;
-        }
-      }
-    }
-  } else if (isa<InlineAsm>(I.getOperand(0))) {
-    visitInlineAsm(&I);
-    return;
-  }
-
-  SDValue Callee;
-  if (!RenameFn)
-    Callee = getValue(I.getOperand(0));
-  else
-    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
-
-  // Check if we can potentially perform a tail call. More detailed
-  // checking is be done within LowerCallTo, after more information
-  // about the call is known.
-  bool isTailCall = PerformTailCallOpt && I.isTailCall();
-
-  LowerCallTo(&I, Callee, isTailCall);
-}
-
-
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value.  This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
-                                      SDValue &Chain,
-                                      SDValue *Flag) const {
-  // Assemble the legal parts into the final values.
-  SmallVector<SDValue, 4> Values(ValueVTs.size());
-  SmallVector<SDValue, 8> Parts;
-  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    // Copy the legal parts from the registers.
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
-
-    Parts.resize(NumRegs);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      SDValue P;
-      if (Flag == 0)
-        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
-      else {
-        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
-        *Flag = P.getValue(2);
-      }
-      Chain = P.getValue(1);
-
-      // If the source register was virtual and if we know something about it,
-      // add an assert node.
-      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
-          RegisterVT.isInteger() && !RegisterVT.isVector()) {
-        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
-        FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-        if (FLI.LiveOutRegInfo.size() > SlotNo) {
-          FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
-
-          unsigned RegSize = RegisterVT.getSizeInBits();
-          unsigned NumSignBits = LOI.NumSignBits;
-          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
-          // FIXME: We capture more information than the dag can represent.  For
-          // now, just use the tightest assertzext/assertsext possible.
-          bool isSExt = true;
-          EVT FromVT(MVT::Other);
-          if (NumSignBits == RegSize)
-            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
-          else if (NumZeroBits >= RegSize-1)
-            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
-          else if (NumSignBits > RegSize-8)
-            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
-          else if (NumZeroBits >= RegSize-8)
-            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
-          else if (NumSignBits > RegSize-16)
-            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
-          else if (NumZeroBits >= RegSize-16)
-            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
-          else if (NumSignBits > RegSize-32)
-            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
-          else if (NumZeroBits >= RegSize-32)
-            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
-          if (FromVT != MVT::Other) {
-            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
-                            RegisterVT, P, DAG.getValueType(FromVT));
-
-          }
-        }
-      }
-
-      Parts[i] = P;
-    }
-
-    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
-                                     NumRegs, RegisterVT, ValueVT);
-    Part += NumRegs;
-    Parts.clear();
-  }
-
-  return DAG.getNode(ISD::MERGE_VALUES, dl,
-                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
-                     &Values[0], ValueVTs.size());
-}
-
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object.  This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue &Chain, SDValue *Flag) const {
-  // Get the list of the values's legal parts.
-  unsigned NumRegs = Regs.size();
-  SmallVector<SDValue, 8> Parts(NumRegs);
-  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
-    EVT RegisterVT = RegVTs[Value];
-
-    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT);
-    Part += NumParts;
-  }
-
-  // Copy the parts into the registers.
-  SmallVector<SDValue, 8> Chains(NumRegs);
-  for (unsigned i = 0; i != NumRegs; ++i) {
-    SDValue Part;
-    if (Flag == 0)
-      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
-    else {
-      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
-      *Flag = Part.getValue(1);
-    }
-    Chains[i] = Part.getValue(0);
-  }
-
-  if (NumRegs == 1 || Flag)
-    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
-    // flagged to it. That is the CopyToReg nodes and the user are considered
-    // a single scheduling unit. If we create a TokenFactor and return it as
-    // chain, then the TokenFactor is both a predecessor (operand) of the
-    // user as well as a successor (the TF operands are flagged to the user).
-    // c1, f1 = CopyToReg
-    // c2, f2 = CopyToReg
-    // c3     = TokenFactor c1, c2
-    // ...
-    //        = op c3, ..., f2
-    Chain = Chains[NumRegs-1];
-  else
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
-}
-
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list.  This adds the code marker and includes the number of
-/// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code,
-                                        bool HasMatching,unsigned MatchingIdx,
-                                        SelectionDAG &DAG,
-                                        std::vector<SDValue> &Ops) const {
-  EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
-  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
-  unsigned Flag = Code | (Regs.size() << 3);
-  if (HasMatching)
-    Flag |= 0x80000000 | (MatchingIdx << 16);
-  Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
-  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
-    EVT RegisterVT = RegVTs[Value];
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      assert(Reg < Regs.size() && "Mismatch in # registers expected");
-      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
-    }
-  }
-}
-
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
-
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
-
-
-namespace llvm {
-/// AsmOperandInfo - This contains information for each constraint that we are
-/// lowering.
-class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
-    public TargetLowering::AsmOperandInfo {
-public:
-  /// CallOperand - If this is the result output operand or a clobber
-  /// this is null, otherwise it is the incoming operand to the CallInst.
-  /// This gets modified as the asm is processed.
-  SDValue CallOperand;
-
-  /// AssignedRegs - If this is a register or register class operand, this
-  /// contains the set of register corresponding to the operand.
-  RegsForValue AssignedRegs;
-
-  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
-    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
-  }
-
-  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
-  /// busy in OutputRegs/InputRegs.
-  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
-                         std::set<unsigned> &OutputRegs,
-                         std::set<unsigned> &InputRegs,
-                         const TargetRegisterInfo &TRI) const {
-    if (isOutReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
-    }
-    if (isInReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
-    }
-  }
-
-  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
-  /// corresponds to.  If there is no Value* for this operand, it returns
-  /// MVT::Other.
-  EVT getCallOperandValEVT(LLVMContext &Context, 
-                           const TargetLowering &TLI,
-                           const TargetData *TD) const {
-    if (CallOperandVal == 0) return MVT::Other;
-
-    if (isa<BasicBlock>(CallOperandVal))
-      return TLI.getPointerTy();
-
-    const llvm::Type *OpTy = CallOperandVal->getType();
-
-    // If this is an indirect operand, the operand is a pointer to the
-    // accessed type.
-    if (isIndirect)
-      OpTy = cast<PointerType>(OpTy)->getElementType();
-
-    // If OpTy is not a single value, it may be a struct/union that we
-    // can tile with integers.
-    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
-      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
-      switch (BitSize) {
-      default: break;
-      case 1:
-      case 8:
-      case 16:
-      case 32:
-      case 64:
-      case 128:
-        OpTy = IntegerType::get(Context, BitSize);
-        break;
-      }
-    }
-
-    return TLI.getValueType(OpTy, true);
-  }
-
-private:
-  /// MarkRegAndAliases - Mark the specified register and all aliases in the
-  /// specified set.
-  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
-                                const TargetRegisterInfo &TRI) {
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
-    Regs.insert(Reg);
-    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
-      for (; *Aliases; ++Aliases)
-        Regs.insert(*Aliases);
-  }
-};
-} // end llvm namespace.
-
-
-/// GetRegistersForValue - Assign registers (virtual or physical) for the
-/// specified operand.  We prefer to assign virtual registers, to allow the
-/// register allocator handle the assignment process.  However, if the asm uses
-/// features that we can't model on machineinstrs, we have SDISel do the
-/// allocation.  This produces generally horrible, but correct, code.
-///
-///   OpInfo describes the operand.
-///   Input and OutputRegs are the set of already allocated physical registers.
-///
-void SelectionDAGLowering::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                     std::set<unsigned> &OutputRegs,
-                     std::set<unsigned> &InputRegs) {
-  LLVMContext &Context = FuncInfo.Fn->getContext();
-
-  // Compute whether this value requires an input register, an output register,
-  // or both.
-  bool isOutReg = false;
-  bool isInReg = false;
-  switch (OpInfo.Type) {
-  case InlineAsm::isOutput:
-    isOutReg = true;
-
-    // If there is an input constraint that matches this, we need to reserve
-    // the input register so no other inputs allocate to it.
-    isInReg = OpInfo.hasMatchingInput();
-    break;
-  case InlineAsm::isInput:
-    isInReg = true;
-    isOutReg = false;
-    break;
-  case InlineAsm::isClobber:
-    isOutReg = true;
-    isInReg = true;
-    break;
-  }
-
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  SmallVector<unsigned, 4> Regs;
-
-  // If this is a constraint for a single physreg, or a constraint for a
-  // register class, find it.
-  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
-    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                     OpInfo.ConstraintVT);
-
-  unsigned NumRegs = 1;
-  if (OpInfo.ConstraintVT != MVT::Other) {
-    // If this is a FP input in an integer register (or visa versa) insert a bit
-    // cast of the input value.  More generally, handle any case where the input
-    // value disagrees with the register class we plan to stick this in.
-    if (OpInfo.Type == InlineAsm::isInput &&
-        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
-      // Try to convert to the first EVT that the reg class contains.  If the
-      // types are identical size, use a bitcast to convert (e.g. two differing
-      // vector types).
-      EVT RegVT = *PhysReg.second->vt_begin();
-      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
-                                         RegVT, OpInfo.CallOperand);
-        OpInfo.ConstraintVT = RegVT;
-      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
-        // If the input is a FP value and we want it in FP registers, do a
-        // bitcast to the corresponding integer type.  This turns an f64 value
-        // into i64, which can be passed with two i32 values on a 32-bit
-        // machine.
-        RegVT = EVT::getIntegerVT(Context, 
-                                  OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
-                                         RegVT, OpInfo.CallOperand);
-        OpInfo.ConstraintVT = RegVT;
-      }
-    }
-
-    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
-  }
-
-  EVT RegVT;
-  EVT ValueVT = OpInfo.ConstraintVT;
-
-  // If this is a constraint for a specific physical register, like {r17},
-  // assign it now.
-  if (unsigned AssignedReg = PhysReg.first) {
-    const TargetRegisterClass *RC = PhysReg.second;
-    if (OpInfo.ConstraintVT == MVT::Other)
-      ValueVT = *RC->vt_begin();
-
-    // Get the actual register value type.  This is important, because the user
-    // may have asked for (e.g.) the AX register in i32 type.  We need to
-    // remember that AX is actually i16 to get the right extension.
-    RegVT = *RC->vt_begin();
-
-    // This is a explicit reference to a physical register.
-    Regs.push_back(AssignedReg);
-
-    // If this is an expanded reference, add the rest of the regs to Regs.
-    if (NumRegs != 1) {
-      TargetRegisterClass::iterator I = RC->begin();
-      for (; *I != AssignedReg; ++I)
-        assert(I != RC->end() && "Didn't find reg!");
-
-      // Already added the first reg.
-      --NumRegs; ++I;
-      for (; NumRegs; --NumRegs, ++I) {
-        assert(I != RC->end() && "Ran out of registers to allocate!");
-        Regs.push_back(*I);
-      }
-    }
-    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-    return;
-  }
-
-  // Otherwise, if this was a reference to an LLVM register class, create vregs
-  // for this reference.
-  if (const TargetRegisterClass *RC = PhysReg.second) {
-    RegVT = *RC->vt_begin();
-    if (OpInfo.ConstraintVT == MVT::Other)
-      ValueVT = RegVT;
-
-    // Create the appropriate number of virtual registers.
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-    for (; NumRegs; --NumRegs)
-      Regs.push_back(RegInfo.createVirtualRegister(RC));
-
-    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
-    return;
-  }
-  
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
-  // Otherwise, we couldn't allocate enough registers for this.
-}
-
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-static bool
-hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
-                          const TargetLowering &TLI) {
-  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
-    InlineAsm::ConstraintInfo &CI = CInfos[i];
-    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
-      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
-      if (CType == TargetLowering::C_Memory)
-        return true;
-    }
-    
-    // Indirect operand accesses access memory.
-    if (CI.isIndirect)
-      return true;
-  }
-
-  return false;
-}
-
-/// visitInlineAsm - Handle a call to an InlineAsm object.
-///
-void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
-  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
-  /// ConstraintOperands - Information about all of the constraints.
-  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
-
-  std::set<unsigned> OutputRegs, InputRegs;
-
-  // Do a prepass over the constraints, canonicalizing them, and building up the
-  // ConstraintOperands list.
-  std::vector<InlineAsm::ConstraintInfo>
-    ConstraintInfos = IA->ParseConstraints();
-
-  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-  
-  SDValue Chain, Flag;
-  
-  // We won't need to flush pending loads if this asm doesn't touch
-  // memory and is nonvolatile.
-  if (hasMemory || IA->hasSideEffects())
-    Chain = getRoot();
-  else
-    Chain = DAG.getRoot();
-
-  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
-  unsigned ResNo = 0;   // ResNo - The result number of the next output.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
-
-    EVT OpVT = MVT::Other;
-
-    // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput:
-      // Indirect outputs just consume an argument.
-      if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-        break;
-      }
-
-      // The return value of the call is this value.  As such, there is no
-      // corresponding argument.
-      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
-             "Bad inline asm!");
-      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getValueType(STy->getElementType(ResNo));
-      } else {
-        assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getValueType(CS.getType());
-      }
-      ++ResNo;
-      break;
-    case InlineAsm::isInput:
-      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
-      break;
-    case InlineAsm::isClobber:
-      // Nothing to do.
-      break;
-    }
-
-    // If this is an input or an indirect output, process the call argument.
-    // BasicBlocks are labels, currently appearing only in asm's.
-    if (OpInfo.CallOperandVal) {
-      // Strip bitcasts, if any.  This mostly comes up for functions.
-      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
-      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
-        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
-      } else {
-        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
-      }
-
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
-    }
-
-    OpInfo.ConstraintVT = OpVT;
-  }
-
-  // Second pass over the constraints: compute which constraint option to use
-  // and assign registers to constraints that want a specific physreg.
-  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
-    // If this is an output operand with a matching input operand, look up the
-    // matching input. If their types mismatch, e.g. one is an integer, the
-    // other is floating point, or their sizes are different, flag it as an
-    // error.
-    if (OpInfo.hasMatchingInput()) {
-      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        if ((OpInfo.ConstraintVT.isInteger() !=
-             Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
-          llvm_report_error("Unsupported asm: input constraint"
-                            " with a matching output constraint of incompatible"
-                            " type!");
-        }
-        Input.ConstraintVT = OpInfo.ConstraintVT;
-      }
-    }
-
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
-
-    // If this is a memory input, and if the operand is not indirect, do what we
-    // need to to provide an address for the memory input.
-    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
-        !OpInfo.isIndirect) {
-      assert(OpInfo.Type == InlineAsm::isInput &&
-             "Can only indirectify direct input operands!");
-
-      // Memory operands really want the address of the value.  If we don't have
-      // an indirect input, put it in the constpool if we can, otherwise spill
-      // it to a stack slot.
-
-      // If the operand is a float, integer, or vector constant, spill to a
-      // constant pool entry to get its address.
-      Value *OpVal = OpInfo.CallOperandVal;
-      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal)) {
-        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
-                                                 TLI.getPointerTy());
-      } else {
-        // Otherwise, create a stack slot and emit a store to it before the
-        // asm.
-        const Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
-        MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
-        Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0);
-        OpInfo.CallOperand = StackSlot;
-      }
-
-      // There is no longer a Value* corresponding to this operand.
-      OpInfo.CallOperandVal = 0;
-      // It is now an indirect operand.
-      OpInfo.isIndirect = true;
-    }
-
-    // If this constraint is for a specific register, allocate it before
-    // anything else.
-    if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
-  }
-  ConstraintInfos.clear();
-
-
-  // Second pass - Loop over all of the operands, assigning virtual or physregs
-  // to register class operands.
-  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
-    // C_Register operands have already been allocated, Other/Memory don't need
-    // to be.
-    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
-  }
-
-  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
-  std::vector<SDValue> AsmNodeOperands;
-  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
-  AsmNodeOperands.push_back(
-          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
-
-
-  // Loop over all of the inputs, copying the operand values into the
-  // appropriate registers and processing the output regs.
-  RegsForValue RetValRegs;
-
-  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
-  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
-
-  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
-    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
-
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput: {
-      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
-          OpInfo.ConstraintType != TargetLowering::C_Register) {
-        // Memory output, or 'other' output (e.g. 'X' constraint).
-        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
-
-        // Add information to the INLINEASM node to know about this output.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
-        AsmNodeOperands.push_back(OpInfo.CallOperand);
-        break;
-      }
-
-      // Otherwise, this is a register or register class output.
-
-      // Copy the output from the appropriate register.  Find a register that
-      // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        llvm_report_error("Couldn't allocate output reg for"
-                          " constraint '" + OpInfo.ConstraintCode + "'!");
-      }
-
-      // If this is an indirect operand, store through the pointer after the
-      // asm.
-      if (OpInfo.isIndirect) {
-        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
-                                                      OpInfo.CallOperandVal));
-      } else {
-        // This is the result value of the call.
-        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
-               "Bad inline asm!");
-        // Concatenate this output onto the outputs list.
-        RetValRegs.append(OpInfo.AssignedRegs);
-      }
-
-      // Add information to the INLINEASM node to know that this register is
-      // set.
-      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
-                                               6 /* EARLYCLOBBER REGDEF */ :
-                                               2 /* REGDEF */ ,
-                                               false,
-                                               0,
-                                               DAG, AsmNodeOperands);
-      break;
-    }
-    case InlineAsm::isInput: {
-      SDValue InOperandVal = OpInfo.CallOperand;
-
-      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
-        // If this is required to match an output register we have already set,
-        // just use its register.
-        unsigned OperandNo = OpInfo.getMatchedOperand();
-
-        // Scan until we find the definition we already emitted of this operand.
-        // When we find it, create a RegsForValue operand.
-        unsigned CurOp = 2;  // The first operand.
-        for (; OperandNo; --OperandNo) {
-          // Advance to the next operand.
-          unsigned OpFlag =
-            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
-                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
-                  (OpFlag & 7) == 4 /*MEM*/) &&
-                 "Skipped past definitions?");
-          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
-        }
-
-        unsigned OpFlag =
-          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-        if ((OpFlag & 7) == 2 /*REGDEF*/
-            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
-          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
-          if (OpInfo.isIndirect) {
-            llvm_report_error("Don't know how to handle tied indirect "
-                              "register inputs yet!");
-          }
-          RegsForValue MatchedRegs;
-          MatchedRegs.TLI = &TLI;
-          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
-          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
-          MatchedRegs.RegVTs.push_back(RegVT);
-          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i)
-            MatchedRegs.Regs.
-              push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
-
-          // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    Chain, &Flag);
-          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
-                                           true, OpInfo.getMatchedOperand(),
-                                           DAG, AsmNodeOperands);
-          break;
-        } else {
-          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
-          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
-                 "Unexpected number of operands");
-          // Add information to the INLINEASM node to know about this input.
-          // See InlineAsm.h isUseOperandTiedToDef.
-          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
-          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
-                                                          TLI.getPointerTy()));
-          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
-          break;
-        }
-      }
-
-      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
-        assert(!OpInfo.isIndirect &&
-               "Don't know how to handle indirect other inputs yet!");
-
-        std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
-                                         hasMemory, Ops, DAG);
-        if (Ops.empty()) {
-          llvm_report_error("Invalid operand for inline asm"
-                            " constraint '" + OpInfo.ConstraintCode + "'!");
-        }
-
-        // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
-        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
-        break;
-      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
-        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
-        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
-               "Memory operands expect pointer values");
-
-        // Add information to the INLINEASM node to know about this input.
-        unsigned ResOpType = 4/*MEM*/ | (1<<3);
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        TLI.getPointerTy()));
-        AsmNodeOperands.push_back(InOperandVal);
-        break;
-      }
-
-      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
-              OpInfo.ConstraintType == TargetLowering::C_Register) &&
-             "Unknown constraint type!");
-      assert(!OpInfo.isIndirect &&
-             "Don't know how to handle indirect register inputs yet!");
-
-      // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty()) {
-        llvm_report_error("Couldn't allocate input reg for"
-                          " constraint '"+ OpInfo.ConstraintCode +"'!");
-      }
-
-      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        Chain, &Flag);
-
-      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
-                                               DAG, AsmNodeOperands);
-      break;
-    }
-    case InlineAsm::isClobber: {
-      // Add the clobbered value to the operand list, so that the register
-      // allocator is aware that the physreg got clobbered.
-      if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
-                                                 false, 0, DAG,AsmNodeOperands);
-      break;
-    }
-    }
-  }
-
-  // Finish up input operands.
-  AsmNodeOperands[0] = Chain;
-  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
-
-  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
-                      DAG.getVTList(MVT::Other, MVT::Flag),
-                      &AsmNodeOperands[0], AsmNodeOperands.size());
-  Flag = Chain.getValue(1);
-
-  // If this asm returns a register value, copy the result from that register
-  // and set it as the value of the call.
-  if (!RetValRegs.Regs.empty()) {
-    SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             Chain, &Flag);
-
-    // FIXME: Why don't we do this for inline asms with MRVs?
-    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      EVT ResultType = TLI.getValueType(CS.getType());
-
-      // If any of the results of the inline asm is a vector, it may have the
-      // wrong width/num elts.  This can happen for register classes that can
-      // contain multiple different value types.  The preg or vreg allocated may
-      // not have the same VT as was expected.  Convert it to the right type
-      // with bit_convert.
-      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
-        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
-                          ResultType, Val);
-
-      } else if (ResultType != Val.getValueType() &&
-                 ResultType.isInteger() && Val.getValueType().isInteger()) {
-        // If a result value was tied to an input value, the computed result may
-        // have a wider width than the expected result.  Extract the relevant
-        // portion.
-        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
-      }
-
-      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
-    }
-
-    setValue(CS.getInstruction(), Val);
-    // Don't need to use this as a chain in this case.
-    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
-      return;
-  }
-
-  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
-
-  // Process indirect outputs, first output all of the flagged copies out of
-  // physregs.
-  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
-    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
-    Value *Ptr = IndirectStoresToEmit[i].second;
-    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             Chain, &Flag);
-    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
-
-  }
-
-  // Emit the non-flagged stores from the physregs.
-  SmallVector<SDValue, 8> OutChains;
-  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
-    OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
-                                    StoresToEmit[i].first,
-                                    getValue(StoresToEmit[i].second),
-                                    StoresToEmit[i].second, 0));
-  if (!OutChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
-                        &OutChains[0], OutChains.size());
-  DAG.setRoot(Chain);
-}
-
-void SelectionDAGLowering::visitVAStart(CallInst &I) {
-  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
-                          MVT::Other, getRoot(),
-                          getValue(I.getOperand(1)),
-                          DAG.getSrcValue(I.getOperand(1))));
-}
-
-void SelectionDAGLowering::visitVAArg(VAArgInst &I) {
-  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
-                           getRoot(), getValue(I.getOperand(0)),
-                           DAG.getSrcValue(I.getOperand(0)));
-  setValue(&I, V);
-  DAG.setRoot(V.getValue(1));
-}
-
-void SelectionDAGLowering::visitVAEnd(CallInst &I) {
-  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
-                          MVT::Other, getRoot(),
-                          getValue(I.getOperand(1)),
-                          DAG.getSrcValue(I.getOperand(1))));
-}
-
-void SelectionDAGLowering::visitVACopy(CallInst &I) {
-  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
-                          MVT::Other, getRoot(),
-                          getValue(I.getOperand(1)),
-                          getValue(I.getOperand(2)),
-                          DAG.getSrcValue(I.getOperand(1)),
-                          DAG.getSrcValue(I.getOperand(2))));
-}
-
-/// TargetLowering::LowerCallTo - This is the default LowerCallTo
-/// implementation, which just calls LowerCall.
-/// FIXME: When all targets are
-/// migrated to using LowerCall, this hook should be integrated into SDISel.
-std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
-                            bool RetSExt, bool RetZExt, bool isVarArg,
-                            bool isInreg, unsigned NumFixedArgs,
-                            CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
-                            SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
-
-  assert((!isTailCall || PerformTailCallOpt) &&
-         "isTailCall set when tail-call optimizations are disabled!");
-
-  // Handle all of the outgoing arguments.
-  SmallVector<ISD::OutputArg, 32> Outs;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
-      SDValue Op = SDValue(Args[i].Node.getNode(),
-                           Args[i].Node.getResNo() + Value);
-      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
-
-      if (Args[i].isZExt)
-        Flags.setZExt();
-      if (Args[i].isSExt)
-        Flags.setSExt();
-      if (Args[i].isInReg)
-        Flags.setInReg();
-      if (Args[i].isSRet)
-        Flags.setSRet();
-      if (Args[i].isByVal) {
-        Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
-        // For ByVal, alignment should come from FE.  BE will guess if this
-        // info is not there but there are cases it cannot get right.
-        if (Args[i].Alignment)
-          FrameAlign = Args[i].Alignment;
-        Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
-      }
-      if (Args[i].isNest)
-        Flags.setNest();
-      Flags.setOrigAlign(OriginalAlignment);
-
-      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
-      SmallVector<SDValue, 4> Parts(NumParts);
-      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
-      if (Args[i].isSExt)
-        ExtendKind = ISD::SIGN_EXTEND;
-      else if (Args[i].isZExt)
-        ExtendKind = ISD::ZERO_EXTEND;
-
-      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
-
-      for (unsigned j = 0; j != NumParts; ++j) {
-        // if it isn't first piece, alignment must be 1
-        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
-        if (NumParts > 1 && j == 0)
-          MyFlags.Flags.setSplit();
-        else if (j != 0)
-          MyFlags.Flags.setOrigAlign(1);
-
-        Outs.push_back(MyFlags);
-      }
-    }
-  }
-
-  // Handle the incoming return values from the call.
-  SmallVector<ISD::InputArg, 32> Ins;
-  SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.Used = isReturnValueUsed;
-      if (RetSExt)
-        MyFlags.Flags.setSExt();
-      if (RetZExt)
-        MyFlags.Flags.setZExt();
-      if (isInreg)
-        MyFlags.Flags.setInReg();
-      Ins.push_back(MyFlags);
-    }
-  }
-
-  // Check if target-dependent constraints permit a tail call here.
-  // Target-independent constraints should be checked by the caller.
-  if (isTailCall &&
-      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
-    isTailCall = false;
-
-  SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
-                    Outs, Ins, dl, DAG, InVals);
-
-  // Verify that the target's LowerCall behaved as expected.
-  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
-         "LowerCall didn't return a valid chain!");
-  assert((!isTailCall || InVals.empty()) &&
-         "LowerCall emitted a return value for a tail call!");
-  assert((isTailCall || InVals.size() == Ins.size()) &&
-         "LowerCall didn't emit the correct number of values!");
-  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-          assert(InVals[i].getNode() &&
-                 "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
-                 "LowerCall emitted a value with the wrong type!");
-        });
-
-  // For a tail call, the return value is merely live-out and there aren't
-  // any nodes in the DAG representing it. Return a special value to
-  // indicate that a tail call has been emitted and no more Instructions
-  // should be processed in the current block.
-  if (isTailCall) {
-    DAG.setRoot(Chain);
-    return std::make_pair(SDValue(), SDValue());
-  }
-
-  // Collect the legal value parts into potentially illegal values
-  // that correspond to the original function's return values.
-  ISD::NodeType AssertOp = ISD::DELETED_NODE;
-  if (RetSExt)
-    AssertOp = ISD::AssertSext;
-  else if (RetZExt)
-    AssertOp = ISD::AssertZext;
-  SmallVector<SDValue, 4> ReturnValues;
-  unsigned CurReg = 0;
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
-
-    SDValue ReturnValue =
-      getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
-                       AssertOp);
-    ReturnValues.push_back(ReturnValue);
-    CurReg += NumRegs;
-  }
-
-  // For a function returning void, there is no return value. We can't create
-  // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actualy look at the value.
-  if (ReturnValues.empty())
-    return std::make_pair(SDValue(), Chain);
-
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
-                            DAG.getVTList(&RetTys[0], RetTys.size()),
-                            &ReturnValues[0], ReturnValues.size());
-
-  return std::make_pair(Res, Chain);
-}
-
-void TargetLowering::LowerOperationWrapper(SDNode *N,
-                                           SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) {
-  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-  if (Res.getNode())
-    Results.push_back(Res);
-}
-
-SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
-  llvm_unreachable("LowerOperation not implemented for this target!");
-  return SDValue();
-}
-
-
-void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
-  SDValue Op = getValue(V);
-  assert((Op.getOpcode() != ISD::CopyFromReg ||
-          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
-         "Copy from a reg to the same reg!");
-  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
-
-  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
-  SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
-  PendingExports.push_back(Chain);
-}
-
-#include "llvm/CodeGen/SelectionDAGISel.h"
-
-void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
-  // If this is the entry block, emit arguments.
-  Function &F = *LLVMBB->getParent();
-  SelectionDAG &DAG = SDL->DAG;
-  SDValue OldRoot = DAG.getRoot();
-  DebugLoc dl = SDL->getCurDebugLoc();
-  const TargetData *TD = TLI.getTargetData();
-  SmallVector<ISD::InputArg, 16> Ins;
-
-  // Check whether the function can return without sret-demotion.
-  SmallVector<EVT, 4> OutVTs;
-  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
-  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 
-                OutVTs, OutsFlags, TLI);
-  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-
-  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), 
-    OutVTs, OutsFlags, DAG);
-  if (!FLI.CanLowerReturn) {
-    // Put in an sret pointer parameter before all the other parameters.
-    SmallVector<EVT, 1> ValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
-
-    // NOTE: Assuming that a pointer will never break down to more than one VT
-    // or one register.
-    ISD::ArgFlagsTy Flags;
-    Flags.setSRet();
-    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
-    ISD::InputArg RetArg(Flags, RegisterVT, true);
-    Ins.push_back(RetArg);
-  }
-
-  // Set up the incoming argument description vector.
-  unsigned Idx = 1;
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
-       I != E; ++I, ++Idx) {
-    SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, I->getType(), ValueVTs);
-    bool isArgValueUsed = !I->use_empty();
-    for (unsigned Value = 0, NumValues = ValueVTs.size();
-         Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
-      ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment =
-        TD->getABITypeAlignment(ArgTy);
-
-      if (F.paramHasAttr(Idx, Attribute::ZExt))
-        Flags.setZExt();
-      if (F.paramHasAttr(Idx, Attribute::SExt))
-        Flags.setSExt();
-      if (F.paramHasAttr(Idx, Attribute::InReg))
-        Flags.setInReg();
-      if (F.paramHasAttr(Idx, Attribute::StructRet))
-        Flags.setSRet();
-      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
-        Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
-        if (F.getParamAlignment(Idx))
-          FrameAlign = F.getParamAlignment(Idx);
-        Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
-      }
-      if (F.paramHasAttr(Idx, Attribute::Nest))
-        Flags.setNest();
-      Flags.setOrigAlign(OriginalAlignment);
-
-      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
-      for (unsigned i = 0; i != NumRegs; ++i) {
-        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
-        if (NumRegs > 1 && i == 0)
-          MyFlags.Flags.setSplit();
-        // if it isn't first piece, alignment must be 1
-        else if (i > 0)
-          MyFlags.Flags.setOrigAlign(1);
-        Ins.push_back(MyFlags);
-      }
-    }
-  }
-
-  // Call the target to set up the argument values.
-  SmallVector<SDValue, 8> InVals;
-  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
-                                             F.isVarArg(), Ins,
-                                             dl, DAG, InVals);
-
-  // Verify that the target's LowerFormalArguments behaved as expected.
-  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
-         "LowerFormalArguments didn't return a valid chain!");
-  assert(InVals.size() == Ins.size() &&
-         "LowerFormalArguments didn't emit the correct number of values!");
-  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-          assert(InVals[i].getNode() &&
-                 "LowerFormalArguments emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
-                 "LowerFormalArguments emitted a value with the wrong type!");
-        });
-
-  // Update the DAG with the new chain value resulting from argument lowering.
-  DAG.setRoot(NewRoot);
-
-  // Set up the argument values.
-  unsigned i = 0;
-  Idx = 1;
-  if (!FLI.CanLowerReturn) {
-    // Create a virtual register for the sret pointer, and put in a copy
-    // from the sret argument into it.
-    SmallVector<EVT, 1> ValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
-    EVT VT = ValueVTs[0];
-    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
-    ISD::NodeType AssertOp = ISD::DELETED_NODE;
-    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
-                                        VT, AssertOp);
-
-    MachineFunction& MF = SDL->DAG.getMachineFunction();
-    MachineRegisterInfo& RegInfo = MF.getRegInfo();
-    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
-    FLI.DemoteRegister = SRetReg;
-    NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue);
-    DAG.setRoot(NewRoot);
-    
-    // i indexes lowered arguments.  Bump it past the hidden sret argument.
-    // Idx indexes LLVM arguments.  Don't touch it.
-    ++i;
-  }
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
-      ++I, ++Idx) {
-    SmallVector<SDValue, 4> ArgValues;
-    SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, I->getType(), ValueVTs);
-    unsigned NumValues = ValueVTs.size();
-    for (unsigned Value = 0; Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
-      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
-
-      if (!I->use_empty()) {
-        ISD::NodeType AssertOp = ISD::DELETED_NODE;
-        if (F.paramHasAttr(Idx, Attribute::SExt))
-          AssertOp = ISD::AssertSext;
-        else if (F.paramHasAttr(Idx, Attribute::ZExt))
-          AssertOp = ISD::AssertZext;
-
-        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
-                                             PartVT, VT, AssertOp));
-      }
-      i += NumParts;
-    }
-    if (!I->use_empty()) {
-      SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
-                                          SDL->getCurDebugLoc()));
-      // If this argument is live outside of the entry block, insert a copy from
-      // whereever we got it to the vreg that other BB's will reference it as.
-      SDL->CopyToExportRegsIfNeeded(I);
-    }
-  }
-  assert(i == InVals.size() && "Argument register count mismatch!");
-
-  // Finally, if the target has anything special to do, allow it to do so.
-  // FIXME: this should insert code into the DAG!
-  EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction());
-}
-
-/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
-/// ensure constants are generated when needed.  Remember the virtual registers
-/// that need to be added to the Machine PHI nodes as input.  We cannot just
-/// directly add them, because expansion might result in multiple MBB's for one
-/// BB.  As such, the start of the BB might correspond to a different MBB than
-/// the end.
-///
-void
-SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
-
-  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
-  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
-    if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
-
-    // If this terminator has multiple identical successors (common for
-    // switches), only handle each succ once.
-    if (!SuccsHandled.insert(SuccMBB)) continue;
-
-    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
-
-    // At this point we know that there is a 1-1 correspondence between LLVM PHI
-    // nodes and Machine PHI nodes, but the incoming operands have not been
-    // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
-      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
-
-      unsigned Reg;
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
-
-      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
-        unsigned &RegOut = SDL->ConstantsOut[C];
-        if (RegOut == 0) {
-          RegOut = FuncInfo->CreateRegForValue(C);
-          SDL->CopyValueToVirtualRegister(C, RegOut);
-        }
-        Reg = RegOut;
-      } else {
-        Reg = FuncInfo->ValueMap[PHIOp];
-        if (Reg == 0) {
-          assert(isa<AllocaInst>(PHIOp) &&
-                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
-                 "Didn't codegen value into a register!??");
-          Reg = FuncInfo->CreateRegForValue(PHIOp);
-          SDL->CopyValueToVirtualRegister(PHIOp, Reg);
-        }
-      }
-
-      // Remember that this register needs to added to the machine PHI node as
-      // the input for this MBB.
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
-      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        EVT VT = ValueVTs[vti];
-        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
-        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
-          SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
-        Reg += NumRegisters;
-      }
-    }
-  }
-  SDL->ConstantsOut.clear();
-}
-
-/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
-/// supports legal types, and it emits MachineInstrs directly instead of
-/// creating SelectionDAG nodes.
-///
-bool
-SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
-                                                      FastISel *F) {
-  TerminatorInst *TI = LLVMBB->getTerminator();
-
-  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
-  unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size();
-
-  // Check successor nodes' PHI nodes that expect a constant to be available
-  // from this block.
-  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
-    BasicBlock *SuccBB = TI->getSuccessor(succ);
-    if (!isa<PHINode>(SuccBB->begin())) continue;
-    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
-
-    // If this terminator has multiple identical successors (common for
-    // switches), only handle each succ once.
-    if (!SuccsHandled.insert(SuccMBB)) continue;
-
-    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
-    PHINode *PN;
-
-    // At this point we know that there is a 1-1 correspondence between LLVM PHI
-    // nodes and Machine PHI nodes, but the incoming operands have not been
-    // emitted yet.
-    for (BasicBlock::iterator I = SuccBB->begin();
-         (PN = dyn_cast<PHINode>(I)); ++I) {
-      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
-
-      // Only handle legal types. Two interesting things to note here. First,
-      // by bailing out early, we may leave behind some dead instructions,
-      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
-      // own moves. Second, this check is necessary becuase FastISel doesn't
-      // use CreateRegForValue to create registers, so it always creates
-      // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
-      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
-          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
-        else {
-          SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-          return false;
-        }
-      }
-
-      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
-
-      unsigned Reg = F->getRegForValue(PHIOp);
-      if (Reg == 0) {
-        SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
-        return false;
-      }
-      SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
-    }
-  }
-
-  return true;
-}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
deleted file mode 100644
index 10f256c153060..0000000000000
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ /dev/null
@@ -1,579 +0,0 @@
-//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements routines for translating from LLVM IR into SelectionDAG IR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SELECTIONDAGBUILD_H
-#define SELECTIONDAGBUILD_H
-
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <vector>
-#include <set>
-
-namespace llvm {
-
-class AliasAnalysis;
-class AllocaInst;
-class BasicBlock;
-class BitCastInst;
-class BranchInst;
-class CallInst;
-class ExtractElementInst;
-class ExtractValueInst;
-class FCmpInst;
-class FPExtInst;
-class FPToSIInst;
-class FPToUIInst;
-class FPTruncInst;
-class Function;
-class GetElementPtrInst;
-class GCFunctionInfo;
-class ICmpInst;
-class IntToPtrInst;
-class IndirectBrInst;
-class InvokeInst;
-class InsertElementInst;
-class InsertValueInst;
-class Instruction;
-class LoadInst;
-class MachineBasicBlock;
-class MachineFunction;
-class MachineInstr;
-class MachineModuleInfo;
-class MachineRegisterInfo;
-class PHINode;
-class PtrToIntInst;
-class ReturnInst;
-class SDISelAsmOperandInfo;
-class SExtInst;
-class SelectInst;
-class ShuffleVectorInst;
-class SIToFPInst;
-class StoreInst;
-class SwitchInst;
-class TargetData;
-class TargetLowering;
-class TruncInst;
-class UIToFPInst;
-class UnreachableInst;
-class UnwindInst;
-class VAArgInst;
-class ZExtInst;
-
-//===--------------------------------------------------------------------===//
-/// FunctionLoweringInfo - This contains information that is global to a
-/// function that is used when lowering a region of the function.
-///
-class FunctionLoweringInfo {
-public:
-  TargetLowering &TLI;
-  Function *Fn;
-  MachineFunction *MF;
-  MachineRegisterInfo *RegInfo;
-
-  /// CanLowerReturn - true iff the function's return value can be lowered to
-  /// registers.
-  bool CanLowerReturn;
-
-  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
-  /// allocated to hold a pointer to the hidden sret parameter.
-  unsigned DemoteRegister;
-
-  explicit FunctionLoweringInfo(TargetLowering &TLI);
-
-  /// set - Initialize this FunctionLoweringInfo with the given Function
-  /// and its associated MachineFunction.
-  ///
-  void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG,
-           bool EnableFastISel);
-
-  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
-  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
-
-  /// ValueMap - Since we emit code for the function a basic block at a time,
-  /// we must remember which virtual registers hold the values for
-  /// cross-basic-block values.
-  DenseMap<const Value*, unsigned> ValueMap;
-
-  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
-  /// the entry block.  This allows the allocas to be efficiently referenced
-  /// anywhere in the function.
-  DenseMap<const AllocaInst*, int> StaticAllocaMap;
-
-#ifndef NDEBUG
-  SmallSet<Instruction*, 8> CatchInfoLost;
-  SmallSet<Instruction*, 8> CatchInfoFound;
-#endif
-
-  unsigned MakeReg(EVT VT);
-  
-  /// isExportedInst - Return true if the specified value is an instruction
-  /// exported from its block.
-  bool isExportedInst(const Value *V) {
-    return ValueMap.count(V);
-  }
-
-  unsigned CreateRegForValue(const Value *V);
-  
-  unsigned InitializeRegForValue(const Value *V) {
-    unsigned &R = ValueMap[V];
-    assert(R == 0 && "Already initialized this value register!");
-    return R = CreateRegForValue(V);
-  }
-  
-  struct LiveOutInfo {
-    unsigned NumSignBits;
-    APInt KnownOne, KnownZero;
-    LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
-  };
-  
-  /// LiveOutRegInfo - Information about live out vregs, indexed by their
-  /// register number offset by 'FirstVirtualRegister'.
-  std::vector<LiveOutInfo> LiveOutRegInfo;
-
-  /// clear - Clear out all the function-specific state. This returns this
-  /// FunctionLoweringInfo to an empty state, ready to be used for a
-  /// different function.
-  void clear() {
-    MBBMap.clear();
-    ValueMap.clear();
-    StaticAllocaMap.clear();
-#ifndef NDEBUG
-    CatchInfoLost.clear();
-    CatchInfoFound.clear();
-#endif
-    LiveOutRegInfo.clear();
-  }
-};
-
-//===----------------------------------------------------------------------===//
-/// SelectionDAGLowering - This is the common target-independent lowering
-/// implementation that is parameterized by a TargetLowering object.
-/// Also, targets can overload any lowering method.
-///
-class SelectionDAGLowering {
-  MachineBasicBlock *CurMBB;
-
-  /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
-  DebugLoc CurDebugLoc;
-
-  DenseMap<const Value*, SDValue> NodeMap;
-
-  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
-  /// them up and then emit token factor nodes when possible.  This allows us to
-  /// get simple disambiguation between loads without worrying about alias
-  /// analysis.
-  SmallVector<SDValue, 8> PendingLoads;
-
-  /// PendingExports - CopyToReg nodes that copy values to virtual registers
-  /// for export to other blocks need to be emitted before any terminator
-  /// instruction, but they have no other ordering requirements. We bunch them
-  /// up and the emit a single tokenfactor for them just before terminator
-  /// instructions.
-  SmallVector<SDValue, 8> PendingExports;
-
-  /// Case - A struct to record the Value for a switch case, and the
-  /// case's target basic block.
-  struct Case {
-    Constant* Low;
-    Constant* High;
-    MachineBasicBlock* BB;
-
-    Case() : Low(0), High(0), BB(0) { }
-    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
-      Low(low), High(high), BB(bb) { }
-    APInt size() const {
-      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
-      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
-      return (rHigh - rLow + 1ULL);
-    }
-  };
-
-  struct CaseBits {
-    uint64_t Mask;
-    MachineBasicBlock* BB;
-    unsigned Bits;
-
-    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
-      Mask(mask), BB(bb), Bits(bits) { }
-  };
-
-  typedef std::vector<Case>           CaseVector;
-  typedef std::vector<CaseBits>       CaseBitsVector;
-  typedef CaseVector::iterator        CaseItr;
-  typedef std::pair<CaseItr, CaseItr> CaseRange;
-
-  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
-  /// of conditional branches.
-  struct CaseRec {
-    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
-    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
-
-    /// CaseBB - The MBB in which to emit the compare and branch
-    MachineBasicBlock *CaseBB;
-    /// LT, GE - If nonzero, we know the current case value must be less-than or
-    /// greater-than-or-equal-to these Constants.
-    Constant *LT;
-    Constant *GE;
-    /// Range - A pair of iterators representing the range of case values to be
-    /// processed at this point in the binary search tree.
-    CaseRange Range;
-  };
-
-  typedef std::vector<CaseRec> CaseRecVector;
-
-  /// The comparison function for sorting the switch case values in the vector.
-  /// WARNING: Case ranges should be disjoint!
-  struct CaseCmp {
-    bool operator () (const Case& C1, const Case& C2) {
-      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
-      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
-      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
-      return CI1->getValue().slt(CI2->getValue());
-    }
-  };
-
-  struct CaseBitsCmp {
-    bool operator () (const CaseBits& C1, const CaseBits& C2) {
-      return C1.Bits > C2.Bits;
-    }
-  };
-
-  size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
-
-  /// CaseBlock - This structure is used to communicate between SDLowering and
-  /// SDISel for the code generation of additional basic blocks needed by multi-
-  /// case switch statements.
-  struct CaseBlock {
-    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
-              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
-              MachineBasicBlock *me)
-      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
-        TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
-    // CC - the condition code to use for the case block's setcc node
-    ISD::CondCode CC;
-    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
-    // Emit by default LHS op RHS. MHS is used for range comparisons:
-    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
-    Value *CmpLHS, *CmpMHS, *CmpRHS;
-    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
-    MachineBasicBlock *TrueBB, *FalseBB;
-    // ThisBB - the block into which to emit the code for the setcc and branches
-    MachineBasicBlock *ThisBB;
-  };
-  struct JumpTable {
-    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
-              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-  
-    /// Reg - the virtual register containing the index of the jump table entry
-    //. to jump to.
-    unsigned Reg;
-    /// JTI - the JumpTableIndex for this jump table in the function.
-    unsigned JTI;
-    /// MBB - the MBB into which to emit the code for the indirect jump.
-    MachineBasicBlock *MBB;
-    /// Default - the MBB of the default bb, which is a successor of the range
-    /// check MBB.  This is when updating PHI nodes in successors.
-    MachineBasicBlock *Default;
-  };
-  struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
-                    bool E = false):
-      First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
-    APInt First;
-    APInt Last;
-    Value *SValue;
-    MachineBasicBlock *HeaderBB;
-    bool Emitted;
-  };
-  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
-
-  struct BitTestCase {
-    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
-      Mask(M), ThisBB(T), TargetBB(Tr) { }
-    uint64_t Mask;
-    MachineBasicBlock* ThisBB;
-    MachineBasicBlock* TargetBB;
-  };
-
-  typedef SmallVector<BitTestCase, 3> BitTestInfo;
-
-  struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, Value* SV,
-                 unsigned Rg, bool E,
-                 MachineBasicBlock* P, MachineBasicBlock* D,
-                 const BitTestInfo& C):
-      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
-      Parent(P), Default(D), Cases(C) { }
-    APInt First;
-    APInt Range;
-    Value  *SValue;
-    unsigned Reg;
-    bool Emitted;
-    MachineBasicBlock *Parent;
-    MachineBasicBlock *Default;
-    BitTestInfo Cases;
-  };
-
-public:
-  // TLI - This is information that describes the available target features we
-  // need for lowering.  This indicates when operations are unavailable,
-  // implemented with a libcall, etc.
-  TargetLowering &TLI;
-  SelectionDAG &DAG;
-  const TargetData *TD;
-  AliasAnalysis *AA;
-
-  /// SwitchCases - Vector of CaseBlock structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<CaseBlock> SwitchCases;
-  /// JTCases - Vector of JumpTable structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<JumpTableBlock> JTCases;
-  /// BitTestCases - Vector of BitTestBlock structures used to communicate
-  /// SwitchInst code generation information.
-  std::vector<BitTestBlock> BitTestCases;
-
-  /// PHINodesToUpdate - A list of phi instructions whose operand list will
-  /// be updated after processing the current basic block.
-  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
-  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
-  /// scheduler custom lowering), track the change here.
-  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
-
-  // Emit PHI-node-operand constants only once even if used by multiple
-  // PHI nodes.
-  DenseMap<Constant*, unsigned> ConstantsOut;
-
-  /// FuncInfo - Information about the function as a whole.
-  ///
-  FunctionLoweringInfo &FuncInfo;
-
-  /// OptLevel - What optimization level we're generating code for.
-  /// 
-  CodeGenOpt::Level OptLevel;
-  
-  /// GFI - Garbage collection metadata for the function.
-  GCFunctionInfo *GFI;
-
-  /// HasTailCall - This is set to true if a call in the current
-  /// block has been translated as a tail call. In this case,
-  /// no subsequent DAG nodes should be created.
-  ///
-  bool HasTailCall;
-
-  LLVMContext *Context;
-
-  SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli,
-                       FunctionLoweringInfo &funcinfo,
-                       CodeGenOpt::Level ol)
-    : CurDebugLoc(DebugLoc::getUnknownLoc()), 
-      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
-      HasTailCall(false),
-      Context(dag.getContext()) {
-  }
-
-  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
-
-  /// clear - Clear out the curret SelectionDAG and the associated
-  /// state and prepare this SelectionDAGLowering object to be used
-  /// for a new block. This doesn't clear out information about
-  /// additional blocks that are needed to complete switch lowering
-  /// or PHI node updating; that information is cleared out as it is
-  /// consumed.
-  void clear();
-
-  /// getRoot - Return the current virtual root of the Selection DAG,
-  /// flushing any PendingLoad items. This must be done before emitting
-  /// a store or any other node that may need to be ordered after any
-  /// prior load instructions.
-  ///
-  SDValue getRoot();
-
-  /// getControlRoot - Similar to getRoot, but instead of flushing all the
-  /// PendingLoad items, flush all the PendingExports items. It is necessary
-  /// to do this before emitting a terminator instruction.
-  ///
-  SDValue getControlRoot();
-
-  DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
-
-  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
-
-  void visit(Instruction &I);
-
-  void visit(unsigned Opcode, User &I);
-
-  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
-
-  SDValue getValue(const Value *V);
-
-  void setValue(const Value *V, SDValue NewN) {
-    SDValue &N = NodeMap[V];
-    assert(N.getNode() == 0 && "Already set a value for this node!");
-    N = NewN;
-  }
-  
-  void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                            std::set<unsigned> &OutputRegs, 
-                            std::set<unsigned> &InputRegs);
-
-  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
-                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            unsigned Opc);
-  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
-                                    MachineBasicBlock *FBB,
-                                    MachineBasicBlock *CurBB);
-  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
-  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
-  void CopyToExportRegsIfNeeded(Value *V);
-  void ExportFromCurrentBlock(Value *V);
-  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
-                   MachineBasicBlock *LandingPad = NULL);
-
-private:
-  // Terminator instructions.
-  void visitRet(ReturnInst &I);
-  void visitBr(BranchInst &I);
-  void visitSwitch(SwitchInst &I);
-  void visitIndirectBr(IndirectBrInst &I);
-  void visitUnreachable(UnreachableInst &I) { /* noop */ }
-
-  // Helpers for visitSwitch
-  bool handleSmallSwitchRange(CaseRec& CR,
-                              CaseRecVector& WorkList,
-                              Value* SV,
-                              MachineBasicBlock* Default);
-  bool handleJTSwitchCase(CaseRec& CR,
-                          CaseRecVector& WorkList,
-                          Value* SV,
-                          MachineBasicBlock* Default);
-  bool handleBTSplitSwitchCase(CaseRec& CR,
-                               CaseRecVector& WorkList,
-                               Value* SV,
-                               MachineBasicBlock* Default);
-  bool handleBitTestsSwitchCase(CaseRec& CR,
-                                CaseRecVector& WorkList,
-                                Value* SV,
-                                MachineBasicBlock* Default);  
-public:
-  void visitSwitchCase(CaseBlock &CB);
-  void visitBitTestHeader(BitTestBlock &B);
-  void visitBitTestCase(MachineBasicBlock* NextMBB,
-                        unsigned Reg,
-                        BitTestCase &B);
-  void visitJumpTable(JumpTable &JT);
-  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
-  
-private:
-  // These all get lowered before this pass.
-  void visitInvoke(InvokeInst &I);
-  void visitUnwind(UnwindInst &I);
-
-  void visitBinary(User &I, unsigned OpCode);
-  void visitShift(User &I, unsigned Opcode);
-  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
-  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
-  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(User &I);
-  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
-  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
-  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
-  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
-  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
-  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
-  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
-  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
-  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
-  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
-  void visitShl (User &I) { visitShift(I, ISD::SHL); }
-  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
-  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
-  void visitICmp(User &I);
-  void visitFCmp(User &I);
-  // Visit the conversion instructions
-  void visitTrunc(User &I);
-  void visitZExt(User &I);
-  void visitSExt(User &I);
-  void visitFPTrunc(User &I);
-  void visitFPExt(User &I);
-  void visitFPToUI(User &I);
-  void visitFPToSI(User &I);
-  void visitUIToFP(User &I);
-  void visitSIToFP(User &I);
-  void visitPtrToInt(User &I);
-  void visitIntToPtr(User &I);
-  void visitBitCast(User &I);
-
-  void visitExtractElement(User &I);
-  void visitInsertElement(User &I);
-  void visitShuffleVector(User &I);
-
-  void visitExtractValue(ExtractValueInst &I);
-  void visitInsertValue(InsertValueInst &I);
-
-  void visitGetElementPtr(User &I);
-  void visitSelect(User &I);
-
-  void visitAlloca(AllocaInst &I);
-  void visitLoad(LoadInst &I);
-  void visitStore(StoreInst &I);
-  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
-  void visitCall(CallInst &I);
-  void visitInlineAsm(CallSite CS);
-  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
-
-  void visitPow(CallInst &I);
-  void visitExp2(CallInst &I);
-  void visitExp(CallInst &I);
-  void visitLog(CallInst &I);
-  void visitLog2(CallInst &I);
-  void visitLog10(CallInst &I);
-
-  void visitVAStart(CallInst &I);
-  void visitVAArg(VAArgInst &I);
-  void visitVAEnd(CallInst &I);
-  void visitVACopy(CallInst &I);
-
-  void visitUserOp1(Instruction &I) {
-    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
-  }
-  void visitUserOp2(Instruction &I) {
-    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
-  }
-  
-  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
-  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
-};
-
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI,
-                  MachineBasicBlock *MBB);
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 0000000000000..57d89036a8089
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,5821 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+                 cl::desc("Generate low-precision inline sequences "
+                          "for some float libcalls"),
+                 cl::location(LimitFloatPrecision),
+                 cl::init(0));
+
+namespace {
+  /// RegsForValue - This struct represents the registers (physical or virtual)
+  /// that a particular set of values is assigned, and the type information about
+  /// the value. The most common situation is to represent one value at a time,
+  /// but struct or array values are handled element-wise as multiple values.
+  /// The splitting of aggregates is performed recursively, so that we never
+  /// have aggregate-typed registers. The values at this point do not necessarily
+  /// have legal types, so each value may require one or more registers of some
+  /// legal type.
+  ///
+  struct RegsForValue {
+    /// TLI - The TargetLowering object.
+    ///
+    const TargetLowering *TLI;
+
+    /// ValueVTs - The value types of the values, which may not be legal, and
+    /// may need be promoted or synthesized from one or more registers.
+    ///
+    SmallVector<EVT, 4> ValueVTs;
+
+    /// RegVTs - The value types of the registers. This is the same size as
+    /// ValueVTs and it records, for each value, what the type of the assigned
+    /// register or registers are. (Individual values are never synthesized
+    /// from more than one type of register.)
+    ///
+    /// With virtual registers, the contents of RegVTs is redundant with TLI's
+    /// getRegisterType member function, however when with physical registers
+    /// it is necessary to have a separate record of the types.
+    ///
+    SmallVector<EVT, 4> RegVTs;
+
+    /// Regs - This list holds the registers assigned to the values.
+    /// Each legal or promoted value requires one register, and each
+    /// expanded value requires multiple registers.
+    ///
+    SmallVector<unsigned, 4> Regs;
+
+    RegsForValue() : TLI(0) {}
+
+    RegsForValue(const TargetLowering &tli,
+                 const SmallVector<unsigned, 4> &regs,
+                 EVT regvt, EVT valuevt)
+      : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+    RegsForValue(const TargetLowering &tli,
+                 const SmallVector<unsigned, 4> &regs,
+                 const SmallVector<EVT, 4> &regvts,
+                 const SmallVector<EVT, 4> &valuevts)
+      : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                 unsigned Reg, const Type *Ty) : TLI(&tli) {
+      ComputeValueVTs(tli, Ty, ValueVTs);
+
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
+        for (unsigned i = 0; i != NumRegs; ++i)
+          Regs.push_back(Reg + i);
+        RegVTs.push_back(RegisterVT);
+        Reg += NumRegs;
+      }
+    }
+
+    /// append - Add the specified values to this one.
+    void append(const RegsForValue &RHS) {
+      TLI = RHS.TLI;
+      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    }
+
+
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVTs value.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+                              SDValue &Chain, SDValue *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                       SDValue &Chain, SDValue *Flag) const;
+
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker, matching input operand index
+    /// (if applicable), and includes the number of values added into it.
+    void AddInlineAsmOperands(unsigned Code,
+                              bool HasMatching, unsigned MatchingIdx,
+                              SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+  };
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+                                const SDValue *Parts,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  if (NumParts > 1) {
+    // Assemble the value from multiple parts.
+    if (!ValueVT.isVector() && ValueVT.isInteger()) {
+      unsigned PartBits = PartVT.getSizeInBits();
+      unsigned ValueBits = ValueVT.getSizeInBits();
+
+      // Assemble the power of 2 part.
+      unsigned RoundParts = NumParts & (NumParts - 1) ?
+        1 << Log2_32(NumParts) : NumParts;
+      unsigned RoundBits = PartBits * RoundParts;
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+      SDValue Lo, Hi;
+
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+      if (RoundParts > 2) {
+        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
+        Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
+                              PartVT, HalfVT);
+      } else {
+        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+      }
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+
+      if (RoundParts < NumParts) {
+        // Assemble the trailing non-power-of-2 part.
+        unsigned OddParts = NumParts - RoundParts;
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+        Hi = getCopyFromParts(DAG, dl,
+                              Parts+RoundParts, OddParts, PartVT, OddVT);
+
+        // Combine the round and odd parts.
+        Lo = Val;
+        if (TLI.isBigEndian())
+          std::swap(Lo, Hi);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
+        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                                         TLI.getPointerTy()));
+        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
+        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
+      }
+    } else if (ValueVT.isVector()) {
+      // Handle a multi-element vector.
+      EVT IntermediateVT, RegisterVT;
+      unsigned NumIntermediates;
+      unsigned NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 
+                                   NumIntermediates, RegisterVT);
+      assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+      NumParts = NumRegs; // Silence a compiler warning.
+      assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+      assert(RegisterVT == Parts[0].getValueType() &&
+             "Part type doesn't match part!");
+
+      // Assemble the parts into intermediate operands.
+      SmallVector<SDValue, 8> Ops(NumIntermediates);
+      if (NumIntermediates == NumParts) {
+        // If the register was not expanded, truncate or copy the value,
+        // as appropriate.
+        for (unsigned i = 0; i != NumParts; ++i)
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
+                                    PartVT, IntermediateVT);
+      } else if (NumParts > 0) {
+        // If the intermediate type was expanded, build the intermediate operands
+        // from the parts.
+        assert(NumParts % NumIntermediates == 0 &&
+               "Must expand into a divisible number of parts!");
+        unsigned Factor = NumParts / NumIntermediates;
+        for (unsigned i = 0; i != NumIntermediates; ++i)
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
+                                    PartVT, IntermediateVT);
+      }
+
+      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
+      // operands.
+      Val = DAG.getNode(IntermediateVT.isVector() ?
+                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
+                        ValueVT, &Ops[0], NumIntermediates);
+    } else if (PartVT.isFloatingPoint()) {
+      // FP split into multiple FP parts (for ppcf128)
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+             "Unexpected split");
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+    } else {
+      // FP split into integer parts (soft fp)
+      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+             !PartVT.isVector() && "Unexpected split");
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+    }
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isVector()) {
+    assert(ValueVT.isVector() && "Unknown vector conversion!");
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+  }
+
+  if (ValueVT.isVector()) {
+    assert(ValueVT.getVectorElementType() == PartVT &&
+           ValueVT.getVectorNumElements() == 1 &&
+           "Only trivial scalar-to-vector conversions should get here!");
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
+  }
+
+  if (PartVT.isInteger() &&
+      ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartVT)) {
+      // For a truncate, see if we have any information to
+      // indicate whether the truncated bits will always be
+      // zero or sign-extension.
+      if (AssertOp != ISD::DELETED_NODE)
+        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+                          DAG.getValueType(ValueVT));
+      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+    } else {
+      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+    }
+  }
+
+  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+    if (ValueVT.bitsLT(Val.getValueType()))
+      // FP_ROUND's are always exact here.
+      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+                         DAG.getIntPtrConstant(1));
+    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+  }
+
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+
+  llvm_unreachable("Unknown mismatch!");
+  return SDValue();
+}
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts.  If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
+                           SDValue *Parts, unsigned NumParts, EVT PartVT,
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT PtrVT = TLI.getPointerTy();
+  EVT ValueVT = Val.getValueType();
+  unsigned PartBits = PartVT.getSizeInBits();
+  unsigned OrigNumParts = NumParts;
+  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+  if (!NumParts)
+    return;
+
+  if (!ValueVT.isVector()) {
+    if (PartVT == ValueVT) {
+      assert(NumParts == 1 && "No-op copy with multiple parts!");
+      Parts[0] = Val;
+      return;
+    }
+
+    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+      // If the parts cover more bits than the value has, promote the value.
+      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+        assert(NumParts == 1 && "Do not know what to promote to!");
+        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
+      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
+      } else {
+        llvm_unreachable("Unknown mismatch!");
+      }
+    } else if (PartBits == ValueVT.getSizeInBits()) {
+      // Different types of the same size.
+      assert(NumParts == 1 && PartVT != ValueVT);
+      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+      // If the parts cover less bits than value has, truncate the value.
+      if (PartVT.isInteger() && ValueVT.isInteger()) {
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+      } else {
+        llvm_unreachable("Unknown mismatch!");
+      }
+    }
+
+    // The value may have changed - recompute ValueVT.
+    ValueVT = Val.getValueType();
+    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+           "Failed to tile the value with PartVT!");
+
+    if (NumParts == 1) {
+      assert(PartVT == ValueVT && "Type conversion failed!");
+      Parts[0] = Val;
+      return;
+    }
+
+    // Expand the value into multiple parts.
+    if (NumParts & (NumParts - 1)) {
+      // The number of parts is not a power of 2.  Split off and copy the tail.
+      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+             "Do not know what to expand to!");
+      unsigned RoundParts = 1 << Log2_32(NumParts);
+      unsigned RoundBits = RoundParts * PartBits;
+      unsigned OddParts = NumParts - RoundParts;
+      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
+                                   DAG.getConstant(RoundBits,
+                                                   TLI.getPointerTy()));
+      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
+      if (TLI.isBigEndian())
+        // The odd parts were reversed by getCopyToParts - unreverse them.
+        std::reverse(Parts + RoundParts, Parts + NumParts);
+      NumParts = RoundParts;
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+    }
+
+    // The number of parts is a power of 2.  Repeatedly bisect the value using
+    // EXTRACT_ELEMENT.
+    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
+                           Val);
+    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+      for (unsigned i = 0; i < NumParts; i += StepSize) {
+        unsigned ThisBits = StepSize * PartBits / 2;
+        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+        SDValue &Part0 = Parts[i];
+        SDValue &Part1 = Parts[i+StepSize/2];
+
+        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                            ThisVT, Part0,
+                            DAG.getConstant(1, PtrVT));
+        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                            ThisVT, Part0,
+                            DAG.getConstant(0, PtrVT));
+
+        if (ThisBits == PartBits && ThisVT != PartVT) {
+          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                                PartVT, Part0);
+          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                                PartVT, Part1);
+        }
+      }
+    }
+
+    if (TLI.isBigEndian())
+      std::reverse(Parts, Parts + OrigNumParts);
+
+    return;
+  }
+
+  // Vector ValueVT.
+  if (NumParts == 1) {
+    if (PartVT != ValueVT) {
+      if (PartVT.isVector()) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
+      } else {
+        assert(ValueVT.getVectorElementType() == PartVT &&
+               ValueVT.getVectorNumElements() == 1 &&
+               "Only trivial vector-to-scalar conversions should get here!");
+        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                          PartVT, Val,
+                          DAG.getConstant(0, PtrVT));
+      }
+    }
+
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  EVT IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                              IntermediateVT, NumIntermediates, RegisterVT);
+  unsigned NumElements = ValueVT.getVectorNumElements();
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  NumParts = NumRegs; // Silence a compiler warning.
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDValue, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i)
+    if (IntermediateVT.isVector())
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i * (NumElements / NumIntermediates),
+                                           PtrVT));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                           IntermediateVT, Val,
+                           DAG.getConstant(i, PtrVT));
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
+  }
+}
+
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+  AA = &aa;
+  GFI = gfi;
+  TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the curret SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+  NodeMap.clear();
+  PendingLoads.clear();
+  PendingExports.clear();
+  EdgeMapping.clear();
+  DAG.clear();
+  CurDebugLoc = DebugLoc::getUnknownLoc();
+  HasTailCall = false;
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+  if (PendingLoads.empty())
+    return DAG.getRoot();
+
+  if (PendingLoads.size() == 1) {
+    SDValue Root = PendingLoads[0];
+    DAG.setRoot(Root);
+    PendingLoads.clear();
+    return Root;
+  }
+
+  // Otherwise, we have to make a token factor node.
+  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                               &PendingLoads[0], PendingLoads.size());
+  PendingLoads.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+  SDValue Root = DAG.getRoot();
+
+  if (PendingExports.empty())
+    return Root;
+
+  // Turn all of the CopyToReg chains into one factored node.
+  if (Root.getOpcode() != ISD::EntryToken) {
+    unsigned i = 0, e = PendingExports.size();
+    for (; i != e; ++i) {
+      assert(PendingExports[i].getNode()->getNumOperands() > 1);
+      if (PendingExports[i].getNode()->getOperand(0) == Root)
+        break;  // Don't add the root if we already indirectly depend on it.
+    }
+
+    if (i == e)
+      PendingExports.push_back(Root);
+  }
+
+  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                     &PendingExports[0],
+                     PendingExports.size());
+  PendingExports.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+void SelectionDAGBuilder::visit(Instruction &I) {
+  visit(I.getOpcode(), I);
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+  // Note: this doesn't use InstVisitor, because it has to work with
+  // ConstantExpr's in addition to instructions.
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown instruction type encountered!");
+    // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+  case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
+#include "llvm/Instruction.def"
+  }
+}
+
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+    EVT VT = TLI.getValueType(V->getType(), true);
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return N = DAG.getConstant(*CI, VT);
+
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return N = DAG.getGlobalAddress(GV, VT);
+
+    if (isa<ConstantPointerNull>(C))
+      return N = DAG.getConstant(0, TLI.getPointerTy());
+
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+      return N = DAG.getConstantFP(*CFP, VT);
+
+    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+      return N = DAG.getUNDEF(VT);
+
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDValue N1 = NodeMap[V];
+      assert(N1.getNode() && "visit didn't populate the ValueMap!");
+      return N1;
+    }
+
+    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+      SmallVector<SDValue, 4> Constants;
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI) {
+        SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Constants.push_back(SDValue(Val, i));
+      }
+      return DAG.getMergeValues(&Constants[0], Constants.size(),
+                                getCurDebugLoc());
+    }
+
+    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+             "Unknown struct or array constant!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      unsigned NumElts = ValueVTs.size();
+      if (NumElts == 0)
+        return SDValue(); // empty struct
+      SmallVector<SDValue, 4> Constants(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        EVT EltVT = ValueVTs[i];
+        if (isa<UndefValue>(C))
+          Constants[i] = DAG.getUNDEF(EltVT);
+        else if (EltVT.isFloatingPoint())
+          Constants[i] = DAG.getConstantFP(0, EltVT);
+        else
+          Constants[i] = DAG.getConstant(0, EltVT);
+      }
+      return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
+    }
+
+    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      return DAG.getBlockAddress(BA, VT);
+
+    const VectorType *VecTy = cast<VectorType>(V->getType());
+    unsigned NumElements = VecTy->getNumElements();
+
+    // Now that we know the number and type of the elements, get that number of
+    // elements into the Ops array based on what kind of constant it is.
+    SmallVector<SDValue, 16> Ops;
+    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+      for (unsigned i = 0; i != NumElements; ++i)
+        Ops.push_back(getValue(CP->getOperand(i)));
+    } else {
+      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+      SDValue Op;
+      if (EltVT.isFloatingPoint())
+        Op = DAG.getConstantFP(0, EltVT);
+      else
+        Op = DAG.getConstant(0, EltVT);
+      Ops.assign(NumElements, Op);
+    }
+
+    // Create a BUILD_VECTOR node.
+    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                    VT, &Ops[0], Ops.size());
+  }
+
+  // If this is a static alloca, generate it as the frameindex instead of
+  // computation.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+
+  unsigned InReg = FuncInfo.ValueMap[V];
+  assert(InReg && "Value not in map!");
+
+  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
+}
+
+/// Get the EVTs and ArgFlags collections that represent the return type
+/// of the given function.  This does not require a DAG or a return value, and
+/// is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+                   TargetLowering &TLI,
+                   SmallVectorImpl<uint64_t> *Offsets = 0) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if ( NumValues == 0 ) return;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr & Attribute::SExt)
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr & Attribute::ZExt)
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr & Attribute::InReg)
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr & Attribute::SExt)
+      Flags.setSExt();
+    else if (attr & Attribute::ZExt)
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      OutVTs.push_back(PartVT);
+      OutFlags.push_back(Flags);
+    }
+  }
+}
+
+void SelectionDAGBuilder::visitRet(ReturnInst &I) {
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+  
+  if (!FLI.CanLowerReturn) {
+    unsigned DemoteReg = FLI.DemoteRegister;
+    const Function *F = I.getParent()->getParent();
+
+    // Emit a store of the return value through the virtual register.
+    // Leave Outs empty so that LowerReturn won't try to load return
+    // registers the usual way.
+    SmallVector<EVT, 1> PtrValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 
+                    PtrValueVTs);
+
+    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetOp = getValue(I.getOperand(0));
+  
+    SmallVector<EVT, 4> ValueVTs;
+    SmallVector<uint64_t, 4> Offsets;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    unsigned NumValues = ValueVTs.size();
+
+    SmallVector<SDValue, 4> Chains(NumValues);
+    EVT PtrVT = PtrValueVTs[0];
+    for (unsigned i = 0; i != NumValues; ++i)
+      Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
+                  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+                  DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+                  DAG.getConstant(Offsets[i], PtrVT)),
+                  NULL, Offsets[i], false, 0);
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                        MVT::Other, &Chains[0], NumValues);
+  }
+  else {
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+      unsigned NumValues = ValueVTs.size();
+      if (NumValues == 0) continue;
+  
+      SDValue RetOp = getValue(I.getOperand(i));
+      for (unsigned j = 0, f = NumValues; j != f; ++j) {
+        EVT VT = ValueVTs[j];
+
+        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+        const Function *F = I.getParent()->getParent();
+        if (F->paramHasAttr(0, Attribute::SExt))
+          ExtendKind = ISD::SIGN_EXTEND;
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          ExtendKind = ISD::ZERO_EXTEND;
+
+        // FIXME: C calling convention requires the return type to be promoted to
+        // at least 32-bit. But this is not necessary for non-C calling
+        // conventions. The frontend should mark functions whose return values
+        // require promoting with signext or zeroext attributes.
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+          if (VT.bitsLT(MinVT))
+            VT = MinVT;
+        }
+
+        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        SmallVector<SDValue, 4> Parts(NumParts);
+        getCopyToParts(DAG, getCurDebugLoc(),
+                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+                       &Parts[0], NumParts, PartVT, ExtendKind);
+
+        // 'inreg' on function refers to return value
+        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+        if (F->paramHasAttr(0, Attribute::InReg))
+          Flags.setInReg();
+
+        // Propagate extension type if any
+        if (F->paramHasAttr(0, Attribute::SExt))
+          Flags.setSExt();
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          Flags.setZExt();
+
+        for (unsigned i = 0; i < NumParts; ++i)
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+      }
+    }
+  }
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
+  if (!V->use_empty()) {
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end())
+      CopyValueToVirtualRegister(V, VMI->second);
+  }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
+                                                     const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (FiniteOnlyFPMath())
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
+                                                  MachineBasicBlock *TBB,
+                                                  MachineBasicBlock *FBB,
+                                                  MachineBasicBlock *CurBB) {
+  const BasicBlock *BB = CurBB->getBasicBlock();
+
+  // If the leaf of the tree is a comparison, merge the condition into
+  // the caseblock.
+  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+    // The operands of the cmp have to be in this block.  We don't know
+    // how to export them from some other block.  If this is the first block
+    // of the sequence, no exporting is needed.
+    if (CurBB == CurMBB ||
+        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+      ISD::CondCode Condition;
+      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        Condition = getICmpCondCode(IC->getPredicate());
+      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        Condition = getFCmpCondCode(FC->getPredicate());
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        llvm_unreachable("Unknown compare instruction");
+      }
+
+      CaseBlock CB(Condition, BOp->getOperand(0),
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, TBB, FBB, CurBB);
+  SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
+                                               MachineBasicBlock *TBB,
+                                               MachineBasicBlock *FBB,
+                                               MachineBasicBlock *CurBB,
+                                               unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  Instruction *BOp = dyn_cast<Instruction>(Cond);
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
+    return;
+  }
+
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->insert(++BBI, TmpBB);
+
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+  if (Cases.size() != 2) return true;
+
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+
+  return true;
+}
+
+void SelectionDAGBuilder::visitBr(BranchInst &I) {
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // Update machine-CFG edges.
+    CurMBB->addSuccessor(Succ0MBB);
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq
+  //     cmp D, E
+  //     F = setle
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (BOp->hasOneUse() &&
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
+
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0]);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+      SwitchCases.clear();
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, Succ0MBB, Succ1MBB, CurMBB);
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
+  SDValue Cond;
+  SDValue CondLHS = getValue(CB.CmpLHS);
+  DebugLoc dl = getCurDebugLoc();
+
+  // Build the setcc now.
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
+      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+    SDValue CmpOp = getValue(CB.CmpMHS);
+    EVT VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+                          ISD::SETLE);
+    } else {
+      SDValue SUB = DAG.getNode(ISD::SUB, dl,
+                                VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+  }
+
+  // Update successor info
+  CurMBB->addSuccessor(CB.TrueBB);
+  CurMBB->addSuccessor(CB.FalseBB);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+  }
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+                               MVT::Other, getControlRoot(), Cond,
+                               DAG.getBasicBlock(CB.TrueBB));
+
+  // If the branch was constant folded, fix up the CFG.
+  if (BrCond.getOpcode() == ISD::BR) {
+    CurMBB->removeSuccessor(CB.FalseBB);
+    DAG.setRoot(BrCond);
+  } else {
+    // Otherwise, go ahead and insert the false branch.
+    if (BrCond == getControlRoot())
+      CurMBB->removeSuccessor(CB.TrueBB);
+
+    if (CB.FalseBB == NextBlock)
+      DAG.setRoot(BrCond);
+    else
+      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                              DAG.getBasicBlock(CB.FalseBB)));
+  }
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  EVT PTy = TLI.getPointerTy();
+  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                     JT.Reg, PTy);
+  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+  DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+                          MVT::Other, Index.getValue(1),
+                          Table, Index));
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+                                               JumpTableHeader &JTH) {
+  // Subtract the lowest switch case value from the value being switched on and
+  // conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDValue SwitchOp = getValue(JTH.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(JTH.First, VT));
+
+  // The SDNode we just created, which holds the value being switched on minus
+  // the the smallest case value, needs to be copied to a virtual register so it
+  // can be used as an index into the jump table in a subsequent basic block.
+  // This value may be smaller or larger than the target's pointer type, and
+  // therefore require extension or truncating.
+  SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
+
+  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default block
+  // for the switch statement if the value being switched on exceeds the largest
+  // case in the switch.
+  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+                             TLI.getSetCCResultType(SUB.getValueType()), SUB,
+                             DAG.getConstant(JTH.Last-JTH.First,VT),
+                             ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                               MVT::Other, CopyTo, CMP,
+                               DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB == NextBlock)
+    DAG.setRoot(BrCond);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+                            DAG.getBasicBlock(JT.MBB)));
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
+  // Subtract the minimum value
+  SDValue SwitchOp = getValue(B.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+                                  TLI.getSetCCResultType(SUB.getValueType()),
+                                  SUB, DAG.getConstant(B.Range, VT),
+                                  ISD::SETUGT);
+
+  SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
+
+  B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    B.Reg, ShiftOp);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+  CurMBB->addSuccessor(B.Default);
+  CurMBB->addSuccessor(MBB);
+
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                                MVT::Other, CopyTo, RangeCmp,
+                                DAG.getBasicBlock(B.Default));
+
+  if (MBB == NextBlock)
+    DAG.setRoot(BrRange);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+                            DAG.getBasicBlock(MBB)));
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+                                           unsigned Reg,
+                                           BitTestCase &B) {
+  // Make desired shift
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
+                                       TLI.getPointerTy());
+  SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                                  TLI.getPointerTy(),
+                                  DAG.getConstant(1, TLI.getPointerTy()),
+                                  ShiftOp);
+
+  // Emit bit tests and jumps
+  SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                              TLI.getPointerTy(), SwitchVal,
+                              DAG.getConstant(B.Mask, TLI.getPointerTy()));
+  SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
+                                TLI.getSetCCResultType(AndOp.getValueType()),
+                                AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+                                ISD::SETNE);
+
+  CurMBB->addSuccessor(B.TargetBB);
+  CurMBB->addSuccessor(NextMBB);
+
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              AndCmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CurMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (NextMBB == NextBlock)
+    DAG.setRoot(BrAnd);
+  else
+    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+                            DAG.getBasicBlock(NextMBB)));
+}
+
+void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  const Value *Callee(I.getCalledValue());
+  if (isa<InlineAsm>(Callee))
+    visitInlineAsm(&I);
+  else
+    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  CopyToExportRegsIfNeeded(&I);
+
+  // Update successor info
+  CurMBB->addSuccessor(Return);
+  CurMBB->addSuccessor(LandingPad);
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+                                                 CaseRecVector& WorkList,
+                                                 Value* SV,
+                                                 MachineBasicBlock* Default) {
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Size is the number of Cases represented by this range.
+  size_t Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // TODO: If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->insert(BBI, FallThrough);
+
+      // Put SV in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(SV);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == CurMBB)
+      visitSwitchCase(CB);
+    else
+      SwitchCases.push_back(CB);
+
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return !DisableJumpTables &&
+          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+  APInt LastExt(Last), FirstExt(First);
+  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+  return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+                                             CaseRecVector& WorkList,
+                                             Value* SV,
+                                             MachineBasicBlock* Default) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
+    return false;
+
+  APInt Range = ComputeRange(First, Last);
+  double Density = TSize.roundToDouble() / Range.roundToDouble();
+  if (Density < 0.4)
+    return false;
+
+  DEBUG(errs() << "Lowering jump table\n"
+               << "First entry: " << First << ". Last entry: " << Last << '\n'
+               << "Range: " << Range
+               << "Size: " << TSize << ". Density: " << Density << "\n\n");
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+  CurMF->insert(BBI, JumpTableBB);
+  CR.CaseBB->addSuccessor(Default);
+  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  APInt TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
+    const APInt& High = cast<ConstantInt>(I->High)->getValue();
+
+    if (Low.sle(TEI) && TEI.sle(High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      JumpTableBB->addSuccessor(*I);
+    }
+  }
+
+  // Create a jump table index for this jump table, or return an existing
+  // one.
+  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
+
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
+  if (CR.CaseBB == CurMBB)
+    visitJumpTableHeader(JT, JTH);
+
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  Value* SV,
+                                                  MachineBasicBlock* Default) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  APInt LSize = FrontCase.size();
+  APInt RSize = TSize-LSize;
+  DEBUG(errs() << "Selecting best pivot: \n"
+               << "First: " << First << ", Last: " << Last <<'\n'
+               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+    APInt Range = ComputeRange(LEnd, RBegin);
+    assert((Range - 2ULL).isNonNegative() &&
+           "Invalid case distance");
+    double LDensity = (double)LSize.roundToDouble() / 
+                           (LEnd - First + 1ULL).roundToDouble();
+    double RDensity = (double)RSize.roundToDouble() /
+                           (Last - RBegin + 1ULL).roundToDouble();
+    double Metric = Range.logBase2()*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DEBUG(errs() <<"=>Step\n"
+                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+                 << "LDensity: " << LDensity
+                 << ", RDensity: " << RDensity << '\n'
+                 << "Metric: " << Metric << '\n');
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getValue() ==
+      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C.
+  // Otherwise, branch to LHS.
+  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == CurMBB)
+    visitSwitchCase(CB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   Value* SV,
+                                                   MachineBasicBlock* Default){
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+    return false;
+
+  size_t numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    numCmps += (I->Low == I->High ? 1 : 2);
+  }
+
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
+               << "Total number of comparisons: " << numCmps << '\n');
+
+  // Compute span of values.
+  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+  APInt cmpRange = maxValue - minValue;
+
+  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+               << "Low bound: " << minValue << '\n'
+               << "High bound: " << maxValue << '\n');
+
+  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+
+  DEBUG(errs() << "Emitting bit tests\n");
+  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (minValue.isNonNegative() &&
+      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
+    cmpRange = maxValue;
+  } else {
+    lowBound = minValue;
+  }
+
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+
+    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+    uint64_t lo = (lowValue - lowBound).getZExtValue();
+    uint64_t hi = (highValue - lowBound).getZExtValue();
+
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+  BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DEBUG(errs() << "Cases:\n");
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+                 << ", Bits: " << CasesBits[i].Bits
+                 << ", BB: " << CasesBits[i].BB << '\n');
+
+    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, CaseBB);
+    BTC.push_back(BitTestCase(CasesBits[i].Mask,
+                              CaseBB,
+                              CasesBits[i].BB));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  BitTestBlock BTB(lowBound, cmpRange, SV,
+                   -1U, (CR.CaseBB == CurMBB),
+                   CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == CurMBB)
+    visitBitTestHeader(BTB);
+
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+                                       const SwitchInst& SI) {
+  size_t numCmps = 0;
+
+  // Start with "simple" cases
+  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size() >= 2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    CurMBB->addSuccessor(Default);
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Default)));
+    return;
+  }
+
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  size_t numCmps = Clusterify(Cases, SI);
+  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << '\n');
+  numCmps = 0;
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
+      continue;
+
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
+      continue;
+
+    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // target supports indirect branches, then emit a jump table rather than
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default))
+      continue;
+
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
+  }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
+  // Update machine-CFG edges.
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
+}
+
+
+void SelectionDAGBuilder::visitFSub(User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<VectorType>(Ty)) {
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
+      const VectorType *DestTy = cast<VectorType>(I.getType());
+      const Type *ElTy = DestTy->getElementType();
+      unsigned VL = DestTy->getNumElements();
+      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
+      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
+      if (CV == CNZ) {
+        SDValue Op2 = getValue(I.getOperand(1));
+        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                                 Op2.getValueType(), Op2));
+        return;
+      }
+    }
+  }
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
+    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
+      SDValue Op2 = getValue(I.getOperand(1));
+      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                               Op2.getValueType(), Op2));
+      return;
+    }
+
+  visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  if (!isa<VectorType>(I.getType()) &&
+      Op2.getValueType() != TLI.getShiftAmountTy()) {
+    // If the operand is smaller than the shift count type, promote it.
+    EVT PTy = TLI.getPointerTy();
+    EVT STy = TLI.getShiftAmountTy();
+    if (STy.bitsGT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // If the operand is larger than the shift count type but the shift
+    // count type has enough bits to represent any shift value, truncate
+    // it now. This is a common case and it exposes the truncate to
+    // optimization early.
+    else if (STy.getSizeInBits() >=
+             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                        TLI.getShiftAmountTy(), Op2);
+    // Otherwise we'll need to temporarily settle for some other
+    // convenient type; type legalization will make adjustments as
+    // needed.
+    else if (PTy.bitsLT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                        TLI.getPointerTy(), Op2);
+    else if (PTy.bitsGT(Op2.getValueType()))
+      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
+                        TLI.getPointerTy(), Op2);
+  }
+
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode = getICmpCondCode(predicate);
+  
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition = getFCmpCondCode(predicate);
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(User &I) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues != 0) {
+    SmallVector<SDValue, 4> Values(NumValues);
+    SDValue Cond     = getValue(I.getOperand(0));
+    SDValue TrueVal  = getValue(I.getOperand(1));
+    SDValue FalseVal = getValue(I.getOperand(2));
+
+    for (unsigned i = 0; i != NumValues; ++i)
+      Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+                              TrueVal.getValueType(), Cond,
+                              SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
+                              SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
+
+    setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                             DAG.getVTList(&ValueVTs[0], NumValues),
+                             &Values[0], NumValues));
+  }
+}
+
+
+void SelectionDAGBuilder::visitTrunc(User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+                           DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(User &I){
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(User &I) {
+  // FPToUI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(User &I) {
+  // UIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(User &I){
+  // SIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
+  setValue(&I, Result);
+}
+
+void SelectionDAGBuilder::visitIntToPtr(User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT SrcVT = N.getValueType();
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this
+  // is either a BIT_CONVERT or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                             DestVT, N)); // convert types
+  else
+    setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InVal = getValue(I.getOperand(1));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                getValue(I.getOperand(2)));
+
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+  unsigned MaskNumElts = Mask.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+      return false;
+  return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(User &I) {
+  SmallVector<int, 8> Mask;
+  SDValue Src1 = getValue(I.getOperand(0));
+  SDValue Src2 = getValue(I.getOperand(1));
+
+  // Convert the ConstantVector mask operand into an array of ints, with -1
+  // representing undef values.
+  SmallVector<Constant*, 8> MaskElts;
+  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), 
+                                                     MaskElts);
+  unsigned MaskNumElts = MaskElts.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (isa<UndefValue>(MaskElts[i]))
+      Mask.push_back(-1);
+    else
+      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+  }
+  
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
+  unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+  if (SrcNumElts == MaskNumElts) {
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
+    return;
+  }
+
+  // Normalize the shuffle vector since mask and vector length don't match.
+  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+    // Mask is longer than the source vectors and is a multiple of the source
+    // vectors.  We can use concatenate vector to make the mask and vectors
+    // lengths match.
+    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+      // The shuffle is concatenating two vectors together.
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                               VT, Src1, Src2));
+      return;
+    }
+
+    // Pad both vectors with undefs to make them the same length as the mask.
+    unsigned NumConcat = MaskNumElts / SrcNumElts;
+    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+    SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+    MOps1[0] = Src1;
+    MOps2[0] = Src2;
+    
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 
+                                                  getCurDebugLoc(), VT, 
+                                                  &MOps1[0], NumConcat);
+    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT, 
+                                                  &MOps2[0], NumConcat);
+
+    // Readjust mask for new input vector length.
+    SmallVector<int, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        MappedOps.push_back(Idx);
+      else
+        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+    }
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, 
+                                      &MappedOps[0]));
+    return;
+  }
+
+  if (SrcNumElts > MaskNumElts) {
+    // Analyze the access pattern of the vector to see if we can extract
+    // two subvectors and do the shuffle. The analysis is done by calculating
+    // the range of elements the mask access on both vectors.
+    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MaxRange[2] = {-1, -1};
+
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      int Input = 0;
+      if (Idx < 0)
+        continue;
+      
+      if (Idx >= (int)SrcNumElts) {
+        Input = 1;
+        Idx -= SrcNumElts;
+      }
+      if (Idx > MaxRange[Input])
+        MaxRange[Input] = Idx;
+      if (Idx < MinRange[Input])
+        MinRange[Input] = Idx;
+    }
+
+    // Check if the access is smaller than the vector size and can we find
+    // a reasonable extract index.
+    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
+    int StartIdx[2];  // StartIdx to extract from
+    for (int Input=0; Input < 2; ++Input) {
+      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+        RangeUse[Input] = 0; // Unused
+        StartIdx[Input] = 0;
+      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+        // Fits within range but we should see if we can find a good
+        // start index that is a multiple of the mask length.
+        if (MaxRange[Input] < (int)MaskNumElts) {
+          RangeUse[Input] = 1; // Extract from beginning of the vector
+          StartIdx[Input] = 0;
+        } else {
+          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+              StartIdx[Input] + MaskNumElts < SrcNumElts)
+            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+        }
+      }
+    }
+
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+      setValue(&I, DAG.getUNDEF(VT));  // Vectors are not used.
+      return;
+    }
+    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+      // Extract appropriate subvector and generate a vector shuffle
+      for (int Input=0; Input < 2; ++Input) {
+        SDValue& Src = Input == 0 ? Src1 : Src2;
+        if (RangeUse[Input] == 0) {
+          Src = DAG.getUNDEF(VT);
+        } else {
+          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
+        }
+      }
+      // Calculate new mask.
+      SmallVector<int, 8> MappedOps;
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          MappedOps.push_back(Idx);
+        else if (Idx < (int)SrcNumElts)
+          MappedOps.push_back(Idx - StartIdx[0]);
+        else
+          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+      }
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
+      return;
+    }
+  }
+
+  // We can't use either concat vectors or extract subvectors so fall back to
+  // replacing the shuffle with extract and build vector.
+  // to insert and build vector.
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Mask[i] < 0) {
+      Ops.push_back(DAG.getUNDEF(EltVT));
+    } else {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                                  EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                                  EltVT, Src2,
+                                  DAG.getConstant(Idx - SrcNumElts, PtrVT)));
+    }
+  }
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                           VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Value *Op1 = I.getOperand(1);
+  const Type *AggTy = I.getType();
+  const Type *ValTy = Op1->getType();
+  bool IntoUndef = isa<UndefValue>(Op0);
+  bool FromUndef = isa<UndefValue>(Op1);
+
+  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+                                            I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumAggValues = AggValueVTs.size();
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumAggValues);
+
+  SDValue Agg = getValue(Op0);
+  SDValue Val = getValue(Op1);
+  unsigned i = 0;
+  // Copy the beginning value(s) from the original aggregate.
+  for (; i != LinearIndex; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+  // Copy values from the inserted value(s).
+  for (; i != LinearIndex + NumValValues; ++i)
+    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  // Copy remaining value(s) from the original aggregate.
+  for (; i != NumAggValues; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
+                           &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Type *AggTy = Op0->getType();
+  const Type *ValTy = I.getType();
+  bool OutOfUndef = isa<UndefValue>(Op0);
+
+  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
+                                            I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumValValues);
+
+  SDValue Agg = getValue(Op0);
+  // Copy out the selected value(s).
+  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+    Values[i - LinearIndex] =
+      OutOfUndef ?
+        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+        SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValValueVTs[0], NumValValues),
+                           &Values[0], NumValValues));
+}
+
+
+void SelectionDAGBuilder::visitGetElementPtr(User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        DAG.getIntPtrConstant(Offset));
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->getZExtValue() == 0) continue;
+        uint64_t Offs =
+            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        SDValue OffsVal;
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
+        if (PtrBits < 64) {
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                DAG.getConstant(Offs, MVT::i64));
+        } else
+          OffsVal = DAG.getIntPtrConstant(Offs);
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        OffsVal);
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+                                TD->getTypeAllocSize(Ty));
+      SDValue IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (ElementSize != 1) {
+        if (ElementSize.isPowerOf2()) {
+          unsigned Amt = ElementSize.logBase2();
+          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                             N.getValueType(), IdxN,
+                             DAG.getConstant(Amt, TLI.getPointerTy()));
+        } else {
+          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+                             N.getValueType(), IdxN, Scale);
+        }
+      }
+
+      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                      N.getValueType(), N, IdxN);
+    }
+  }
+  setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDValue AllocSize = getValue(I.getArraySize());
+  
+  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+                          AllocSize,
+                          DAG.getConstant(TySize, AllocSize.getValueType()));
+  
+  
+  
+  EVT IntPtr = TLI.getPointerTy();
+  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+  // Handle alignment.  If the requested alignment is less than or equal to
+  // the stack alignment, ignore it.  If the size is greater than or equal to
+  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign =
+    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
+  if (Align <= StackAlign)
+    Align = 0;
+
+  // Round the size of the allocation up to the stack alignment size
+  // by add SA-1 to the size.
+  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(StackAlign-1));
+  // Mask out the low bits for alignment purposes.
+  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+                            VTs, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+}
+
+void SelectionDAGBuilder::visitLoad(LoadInst &I) {
+  const Value *SV = I.getOperand(0);
+  SDValue Ptr = getValue(SV);
+
+  const Type *Ty = I.getType();
+  bool isVolatile = I.isVolatile();
+  unsigned Alignment = I.getAlignment();
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  SDValue Root;
+  bool ConstantMemory = false;
+  if (I.isVolatile())
+    // Serialize volatile loads with other side effects.
+    Root = getRoot();
+  else if (AA->pointsToConstantMemory(SV)) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SmallVector<SDValue, 4> Chains(NumValues);
+  EVT PtrVT = Ptr.getValueType();
+  for (unsigned i = 0; i != NumValues; ++i) {
+    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+                            DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                        PtrVT, Ptr,
+                                        DAG.getConstant(Offsets[i], PtrVT)),
+                            SV, Offsets[i], isVolatile, Alignment);
+    Values[i] = L;
+    Chains[i] = L.getValue(1);
+  }
+
+  if (!ConstantMemory) {
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other,
+                                  &Chains[0], NumValues);
+    if (isVolatile)
+      DAG.setRoot(Chain);
+    else
+      PendingLoads.push_back(Chain);
+  }
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+
+void SelectionDAGBuilder::visitStore(StoreInst &I) {
+  Value *SrcV = I.getOperand(0);
+  Value *PtrV = I.getOperand(1);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  // Get the lowered operands. Note that we do this after
+  // checking if NumResults is zero, because with zero results
+  // the operands won't have values in the map.
+  SDValue Src = getValue(SrcV);
+  SDValue Ptr = getValue(PtrV);
+
+  SDValue Root = getRoot();
+  SmallVector<SDValue, 4> Chains(NumValues);
+  EVT PtrVT = Ptr.getValueType();
+  bool isVolatile = I.isVolatile();
+  unsigned Alignment = I.getAlignment();
+  for (unsigned i = 0; i != NumValues; ++i)
+    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
+                             SDValue(Src.getNode(), Src.getResNo() + i),
+                             DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                         PtrVT, Ptr,
+                                         DAG.getConstant(Offsets[i], PtrVT)),
+                             PtrV, Offsets[i], isVolatile, Alignment);
+
+  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                          MVT::Other, &Chains[0], NumValues));
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
+                                               unsigned Intrinsic) {
+  bool HasChain = !I.doesNotAccessMemory();
+  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+  // Build the operand list.
+  SmallVector<SDValue, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else {
+      Ops.push_back(getRoot());
+    }
+  }
+
+  // Info is set by getTgtMemInstrinsic
+  TargetLowering::IntrinsicInfo Info;
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+  if (!IsTgtIntrinsic)
+    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+    SDValue Op = getValue(I.getOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
+  }
+#endif // NDEBUG
+  if (HasChain)
+    ValueVTs.push_back(MVT::Other);
+
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+  // Create the node.
+  SDValue Result;
+  if (IsTgtIntrinsic) {
+    // This is target intrinsic that touches memory
+    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+                                     VTs, &Ops[0], Ops.size(),
+                                     Info.memVT, Info.ptrVal, Info.offset,
+                                     Info.align, Info.vol,
+                                     Info.readMem, Info.writeMem);
+  }
+  else if (!HasChain)
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  else if (I.getType() != Type::getVoidTy(*DAG.getContext()))
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  else
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+
+  if (HasChain) {
+    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      EVT VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+    }
+    setValue(&I, Result);
+  }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+///   Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x007fffff, MVT::i32));
+  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+                           DAG.getConstant(0x3f800000, MVT::i32));
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+            DebugLoc dl) {
+  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x7f800000, MVT::i32));
+  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+                           DAG.getConstant(23, TLI.getPointerTy()));
+  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+                           DAG.getConstant(127, MVT::i32));
+  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+///                     Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
+  SDValue Root = getRoot();
+  SDValue L =
+    DAG.getAtomic(Op, getCurDebugLoc(),
+                  getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+                  Root,
+                  getValue(I.getOperand(1)),
+                  getValue(I.getOperand(2)),
+                  I.getOperand(1));
+  setValue(&I, L);
+  DAG.setRoot(L.getValue(1));
+  return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+  SDValue Op1 = getValue(I.getOperand(1));
+  SDValue Op2 = getValue(I.getOperand(2));
+
+  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+  SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
+
+  setValue(&I, Result);
+  return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OFe 1.4426950f
+    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x3fb8aa3b));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // 0.000107046256 error, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      //
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+                                             MVT::i32, t13);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Scale the exponent by log(2) [0.69314718f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3f317218));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   LogofMantissa =
+      //     -1.1609546f +
+      //       (1.4034025f - 0.23903021f * x) * x;
+      //
+      // error 0.0034276066, which is better than 8 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbe74c456));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3fb3a2b1));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                          getF32Constant(DAG, 0x3f949a29));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   LogOfMantissa =
+      //     -1.7417939f +
+      //       (2.8212026f +
+      //         (-1.4699568f +
+      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+      //
+      // error 0.000061011436, which is 14 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbd67b6d6));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ee4f4b8));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fbc278b));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40348e95));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                          getF32Constant(DAG, 0x3fdef31a));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   LogOfMantissa =
+      //     -2.1072184f +
+      //       (4.2372794f +
+      //         (-3.7029485f +
+      //           (2.2781945f +
+      //             (-0.87823314f +
+      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+      //
+      // error 0.0000023660568, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbc91e5ac));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e4350aa));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f60d3e3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x4011cdf0));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x406cfd1c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x408797cb));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                          getF32Constant(DAG, 0x4006dcab));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Get the exponent.
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    // Different possible minimax approximations of significand in
+    // floating-point for various degrees of accuracy over [1,2].
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+      //
+      // error 0.0049451742, which is more than 7 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbeb08fe0));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x40019463));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                           getF32Constant(DAG, 0x3fd6633d));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log2ofMantissa =
+      //     -2.51285454f +
+      //       (4.07009056f +
+      //         (-2.12067489f +
+      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+      //
+      // error 0.0000876136000, which is better than 13 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbda7262e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f25280b));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x4007b923));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40823e2f));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x4020d29c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log2ofMantissa =
+      //     -3.0400495f +
+      //       (6.1129976f +
+      //         (-5.3420409f +
+      //           (3.2865683f +
+      //             (-1.2669343f +
+      //               (0.27515199f -
+      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+      //
+      // error 0.0000018516, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbcd2769e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e8ce0b9));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fa22ae7));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40525723));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x40aaf200));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x40c39dad));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                           getF32Constant(DAG, 0x4042902c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG2, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+
+    // Scale the exponent by log10(2) [0.30102999f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3e9a209a));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log10ofMantissa =
+      //     -0.50419619f +
+      //       (0.60948995f - 0.10380950f * x) * x;
+      //
+      // error 0.0014886165, which is 6 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbdd49a13));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f1c0789));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                            getF32Constant(DAG, 0x3f011300));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log10ofMantissa =
+      //     -0.64831180f +
+      //       (0.91751397f +
+      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+      //
+      // error 0.00019228036, which is better than 12 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3d431f31));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ea21fb2));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f6ae232));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                            getF32Constant(DAG, 0x3f25f7c3));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log10ofMantissa =
+      //     -0.84299375f +
+      //       (1.5327582f +
+      //         (-1.0688956f +
+      //           (0.49102474f +
+      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+      //
+      // error 0.0000037995730, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3c5d51ce));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e00685a));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3efb6798));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f88d192));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3fc4316c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                            getF32Constant(DAG, 0x3f57ce70));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG10, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(1));
+
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP2, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(CallInst &I) {
+  SDValue result;
+  Value *Val = I.getOperand(1);
+  DebugLoc dl = getCurDebugLoc();
+  bool IsExp10 = false;
+
+  if (getValue(Val).getValueType() == MVT::f32 &&
+      getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+        APFloat Ten(10.0f);
+        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+      }
+    }
+  }
+
+  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getOperand(2));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OF10 3.3219281f
+    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x40549a78));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   twoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BIT_CONVERT, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FPOW, dl,
+                         getValue(I.getOperand(1)).getValueType(),
+                         getValue(I.getOperand(1)),
+                         getValue(I.getOperand(2)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
+  DebugLoc dl = getCurDebugLoc();
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    break;
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    break;
+  case Intrinsic::memcpy: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                              I.getOperand(1), 0, I.getOperand(2), 0));
+    return 0;
+  }
+  case Intrinsic::memset: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
+                              I.getOperand(1), 0));
+    return 0;
+  }
+  case Intrinsic::memmove: {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+    SDValue Op3 = getValue(I.getOperand(3));
+    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+    // If the source and destination are known to not be aliases, we can
+    // lower memmove as memcpy.
+    uint64_t Size = -1ULL;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+      Size = C->getZExtValue();
+    if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+        AliasAnalysis::NoAlias) {
+      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
+                                I.getOperand(1), 0, I.getOperand(2), 0));
+      return 0;
+    }
+
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
+                               I.getOperand(1), 0, I.getOperand(2), 0));
+    return 0;
+  }
+  case Intrinsic::dbg_stoppoint: 
+  case Intrinsic::dbg_region_start:
+  case Intrinsic::dbg_region_end:
+  case Intrinsic::dbg_func_start:
+    // FIXME - Remove this instructions once the dust settles.
+    return 0;
+  case Intrinsic::dbg_declare: {
+    if (OptLevel != CodeGenOpt::None) 
+      // FIXME: Variable debug info is not supported here.
+      return 0;
+    DwarfWriter *DW = DAG.getDwarfWriter();
+    if (!DW)
+      return 0;
+    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    Value *Address = DI.getAddress();
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+      Address = BCI->getOperand(0);
+    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end()) 
+      return 0; // VLAs.
+    int FI = SI->second;
+
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+    if (MMI) {
+      MetadataContext &TheMetadata = 
+        DI.getParent()->getContext().getMetadata();
+      unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+      MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
+      MMI->setVariableDbgInfo(Variable, FI, Dbg);
+    }
+    return 0;
+  }
+  case Intrinsic::eh_exception: {
+    // Insert the EXCEPTIONADDR instruction.
+    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[1];
+    Ops[0] = DAG.getRoot();
+    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+    setValue(&I, Op);
+    DAG.setRoot(Op.getValue(1));
+    return 0;
+  }
+
+  case Intrinsic::eh_selector: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (CurMBB->isLandingPad())
+      AddCatchInfo(I, MMI, CurMBB);
+    else {
+#ifndef NDEBUG
+      FuncInfo.CatchInfoLost.insert(&I);
+#endif
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) CurMBB->addLiveIn(Reg);
+    }
+
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getOperand(1));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+
+    DAG.setRoot(Op.getValue(1));
+
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+    return 0;
+  }
+
+  case Intrinsic::eh_typeid_for: {
+    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+
+    if (MMI) {
+      // Find the type id for the given typeinfo.
+      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+
+      unsigned TypeID = MMI->getTypeIDFor(GV);
+      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
+    } else {
+      // Return something different to eh_selector.
+      setValue(&I, DAG.getConstant(1, MVT::i32));
+    }
+
+    return 0;
+  }
+
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+      MMI->setCallsEHReturn(true);
+      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                              MVT::Other,
+                              getControlRoot(),
+                              getValue(I.getOperand(1)),
+                              getValue(I.getOperand(2))));
+    } else {
+      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
+    }
+
+    return 0;
+  case Intrinsic::eh_unwind_init:
+    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
+      MMI->setCallsUnwindInit(true);
+    }
+
+    return 0;
+
+  case Intrinsic::eh_dwarf_cfa: {
+    EVT VT = getValue(I.getOperand(1)).getValueType();
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+                                        TLI.getPointerTy());
+
+    SDValue Offset = DAG.getNode(ISD::ADD, dl,
+                                 TLI.getPointerTy(),
+                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+                                             TLI.getPointerTy()),
+                                 CfaArg);
+    setValue(&I, DAG.getNode(ISD::ADD, dl,
+                             TLI.getPointerTy(),
+                             DAG.getNode(ISD::FRAMEADDR, dl,
+                                         TLI.getPointerTy(),
+                                         DAG.getConstant(0,
+                                                         TLI.getPointerTy())),
+                             Offset));
+    return 0;
+  }
+  case Intrinsic::convertff:
+  case Intrinsic::convertfsi:
+  case Intrinsic::convertfui:
+  case Intrinsic::convertsif:
+  case Intrinsic::convertuif:
+  case Intrinsic::convertss:
+  case Intrinsic::convertsu:
+  case Intrinsic::convertus:
+  case Intrinsic::convertuu: {
+    ISD::CvtCode Code = ISD::CVT_INVALID;
+    switch (Intrinsic) {
+    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
+    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
+    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
+    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
+    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
+    }
+    EVT DestVT = TLI.getValueType(I.getType());
+    Value* Op1 = I.getOperand(1);
+    setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+                                DAG.getValueType(DestVT),
+                                DAG.getValueType(getValue(Op1).getValueType()),
+                                getValue(I.getOperand(2)),
+                                getValue(I.getOperand(3)),
+                                Code));
+    return 0;
+  }
+
+  case Intrinsic::sqrt:
+    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::powi:
+    setValue(&I, DAG.getNode(ISD::FPOWI, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1)),
+                             getValue(I.getOperand(2))));
+    return 0;
+  case Intrinsic::sin:
+    setValue(&I, DAG.getNode(ISD::FSIN, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cos:
+    setValue(&I, DAG.getNode(ISD::FCOS, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::log:
+    visitLog(I);
+    return 0;
+  case Intrinsic::log2:
+    visitLog2(I);
+    return 0;
+  case Intrinsic::log10:
+    visitLog10(I);
+    return 0;
+  case Intrinsic::exp:
+    visitExp(I);
+    return 0;
+  case Intrinsic::exp2:
+    visitExp2(I);
+    return 0;
+  case Intrinsic::pow:
+    visitPow(I);
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDValue Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDValue Op = getRoot();
+    SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+                              DAG.getVTList(MVT::i64, MVT::Other),
+                              &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+                             getValue(I.getOperand(1)).getValueType(),
+                             getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDValue Arg = getValue(I.getOperand(1));
+    EVT Ty = Arg.getValueType();
+    SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
+    setValue(&I, result);
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDValue Op = getRoot();
+    SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
+              DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    SDValue Tmp = getValue(I.getOperand(1));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::stackprotector: {
+    // Emit code into the DAG to store the stack guard onto the stack.
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    EVT PtrTy = TLI.getPointerTy();
+
+    SDValue Src = getValue(I.getOperand(1));   // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+
+    int FI = FuncInfo.StaticAllocaMap[Slot];
+    MFI->setStackProtectorIndex(FI);
+
+    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+    // Store the stack protector onto the stack.
+    SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+                                  PseudoSourceValue::getFixedStack(FI),
+                                  0, true);
+    setValue(&I, Result);
+    DAG.setRoot(Result);
+    return 0;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+
+    assert(CI && "Non-constant type in __builtin_object_size?");
+
+    SDValue Arg = getValue(I.getOperand(0));
+    EVT Ty = Arg.getValueType();
+
+    if (CI->getZExtValue() < 2)
+      setValue(&I, DAG.getConstant(-1ULL, Ty));
+    else
+      setValue(&I, DAG.getConstant(0, Ty));
+    return 0;
+  }
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+
+  case Intrinsic::init_trampoline: {
+    const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getOperand(1));
+    Ops[2] = getValue(I.getOperand(2));
+    Ops[3] = getValue(I.getOperand(3));
+    Ops[4] = DAG.getSrcValue(I.getOperand(1));
+    Ops[5] = DAG.getSrcValue(F);
+
+    SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
+                              DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+                              Ops, 6);
+
+    setValue(&I, Tmp);
+    DAG.setRoot(Tmp.getValue(1));
+    return 0;
+  }
+
+  case Intrinsic::gcroot:
+    if (GFI) {
+      Value *Alloca = I.getOperand(1);
+      Constant *TypeMap = cast<Constant>(I.getOperand(2));
+
+      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+      GFI->addStackRoot(FI->getIndex(), TypeMap);
+    }
+    return 0;
+
+  case Intrinsic::gcread:
+  case Intrinsic::gcwrite:
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+    return 0;
+
+  case Intrinsic::flt_rounds: {
+    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+    return 0;
+  }
+
+  case Intrinsic::trap: {
+    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+    return 0;
+  }
+
+  case Intrinsic::uadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);
+
+  case Intrinsic::prefetch: {
+    SDValue Ops[4];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getOperand(1));
+    Ops[2] = getValue(I.getOperand(2));
+    Ops[3] = getValue(I.getOperand(3));
+    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+    return 0;
+  }
+
+  case Intrinsic::memory_barrier: {
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    for (int x = 1; x < 6; ++x)
+      Ops[x] = getValue(I.getOperand(x));
+
+    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+    return 0;
+  }
+  case Intrinsic::atomic_cmp_swap: {
+    SDValue Root = getRoot();
+    SDValue L =
+      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+                    getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+                    Root,
+                    getValue(I.getOperand(1)),
+                    getValue(I.getOperand(2)),
+                    getValue(I.getOperand(3)),
+                    I.getOperand(1));
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_load_add:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+  case Intrinsic::atomic_load_sub:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+  case Intrinsic::atomic_load_or:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+  case Intrinsic::atomic_load_xor:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+  case Intrinsic::atomic_load_and:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+  case Intrinsic::atomic_load_nand:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+  case Intrinsic::atomic_load_max:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+  case Intrinsic::atomic_load_min:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+  case Intrinsic::atomic_load_umin:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+  case Intrinsic::atomic_load_umax:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+  case Intrinsic::atomic_swap:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return 0;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    return 0;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+/// For target-dependent requirements, a target should override
+/// TargetLowering::IsEligibleForTailCallOptimization.
+///
+static bool
+isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
+                     const TargetLowering &TLI) {
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or an unreachable.
+  if (!Ret && !isa<UnreachableInst>(Term)) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (isa<PointerType>(U->getOperand(0)->getType()) &&
+          isa<PointerType>(U->getType()))))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
+void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
+                                      bool isTailCall,
+                                      MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const Type *RetTy = FTy->getReturnType();
+  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
+  unsigned BeginLabel = 0, EndLabel = 0;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(CS.arg_size());
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  SmallVector<uint64_t, 4> Offsets;
+  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), 
+    OutVTs, OutsFlags, TLI, &Offsets);
+  
+
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), 
+                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+  SDValue DemoteStackSlot;
+
+  if (!CanLowerReturn) {
+    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+                      FTy->getReturnType());
+    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+                      FTy->getReturnType());
+    MachineFunction &MF = DAG.getMachineFunction();
+    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.Alignment = Align;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(FTy->getContext());
+  }
+
+  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    SDValue ArgNode = getValue(*i);
+    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+    unsigned attrInd = i - CS.arg_begin() + 1;
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Args.push_back(Entry);
+  }
+
+  if (LandingPad && MMI) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI->NextLabelID();
+
+    // Both PendingLoads and PendingExports must be flushed here;
+    // this call might not return.
+    (void)getRoot();
+    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+                             getControlRoot(), BeginLabel));
+  }
+
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS.getInstruction(),
+                            CS.getAttributes().getRetAttributes(),
+                            TLI))
+    isTailCall = false;
+
+  std::pair<SDValue,SDValue> Result =
+    TLI.LowerCallTo(getRoot(), RetTy,
+                    CS.paramHasAttr(0, Attribute::SExt),
+                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+                    CS.getCallingConv(),
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
+                    Callee, Args, DAG, getCurDebugLoc());
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode())
+    setValue(CS.getInstruction(), Result.first);
+  else if (!CanLowerReturn && Result.second.getNode()) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+    unsigned NumValues = OutVTs.size();
+    SmallVector<SDValue, 4> Values(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+        DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
+        DAG.getConstant(Offsets[i], PtrVT)),
+        NULL, Offsets[i], false, 1);
+      Values[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    PendingLoads.push_back(Chain);
+
+    setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
+             getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
+             &Values[0], NumValues));
+  }
+  // As a special case, a null chain means that a tail call has
+  // been emitted and the DAG root is already updated.
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
+  else
+    HasTailCall = true;
+
+  if (LandingPad && MMI) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    EndLabel = MMI->NextLabelID();
+    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
+                             getRoot(), EndLabel));
+
+    // Inform MachineModuleInfo of range.
+    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+
+void SelectionDAGBuilder::visitCall(CallInst &I) {
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration()) {
+      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
+      if (II) {
+        if (unsigned IID = II->getIntrinsicID(F)) {
+          RenameFn = visitIntrinsicCall(I, IID);
+          if (!RenameFn)
+            return;
+        }
+      }
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      }
+    }
+
+    // Check for well-known libc/libm calls.  If the function is internal, it
+    // can't be a library call.
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf") {
+        if (I.getNumOperands() == 3 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.getType() == I.getOperand(2)->getType()) {
+          SDValue LHS = getValue(I.getOperand(1));
+          SDValue RHS = getValue(I.getOperand(2));
+          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+                                   LHS.getValueType(), LHS, RHS));
+          return;
+        }
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
+            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getType() == I.getOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getOperand(1));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      }
+    }
+  } else if (isa<InlineAsm>(I.getOperand(0))) {
+    visitInlineAsm(&I);
+    return;
+  }
+
+  SDValue Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getOperand(0));
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  // Check if we can potentially perform a tail call. More detailed
+  // checking is be done within LowerCallTo, after more information
+  // about the call is known.
+  bool isTailCall = PerformTailCallOpt && I.isTailCall();
+
+  LowerCallTo(&I, Callee, isTailCall);
+}
+
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
+                                      SDValue &Chain,
+                                      SDValue *Flag) const {
+  // Assemble the legal parts into the final values.
+  SmallVector<SDValue, 4> Values(ValueVTs.size());
+  SmallVector<SDValue, 8> Parts;
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    // Copy the legal parts from the registers.
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    Parts.resize(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      SDValue P;
+      if (Flag == 0)
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+      else {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+        *Flag = P.getValue(2);
+      }
+      Chain = P.getValue(1);
+
+      // If the source register was virtual and if we know something about it,
+      // add an assert node.
+      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+          RegisterVT.isInteger() && !RegisterVT.isVector()) {
+        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+        FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+        if (FLI.LiveOutRegInfo.size() > SlotNo) {
+          FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
+
+          unsigned RegSize = RegisterVT.getSizeInBits();
+          unsigned NumSignBits = LOI.NumSignBits;
+          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+          // FIXME: We capture more information than the dag can represent.  For
+          // now, just use the tightest assertzext/assertsext possible.
+          bool isSExt = true;
+          EVT FromVT(MVT::Other);
+          if (NumSignBits == RegSize)
+            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+          else if (NumZeroBits >= RegSize-1)
+            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+          else if (NumSignBits > RegSize-8)
+            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+          else if (NumZeroBits >= RegSize-8)
+            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+          else if (NumSignBits > RegSize-16)
+            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+          else if (NumZeroBits >= RegSize-16)
+            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+          else if (NumSignBits > RegSize-32)
+            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+          else if (NumZeroBits >= RegSize-32)
+            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+          if (FromVT != MVT::Other) {
+            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                            RegisterVT, P, DAG.getValueType(FromVT));
+
+          }
+        }
+      }
+
+      Parts[i] = P;
+    }
+
+    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+                                     NumRegs, RegisterVT, ValueVT);
+    Part += NumRegs;
+    Parts.clear();
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, dl,
+                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                     &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue &Chain, SDValue *Flag) const {
+  // Get the list of the values's legal parts.
+  unsigned NumRegs = Regs.size();
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+                   &Parts[Part], NumParts, RegisterVT);
+    Part += NumParts;
+  }
+
+  // Copy the parts into the registers.
+  SmallVector<SDValue, 8> Chains(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i) {
+    SDValue Part;
+    if (Flag == 0)
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+    else {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+      *Flag = Part.getValue(1);
+    }
+    Chains[i] = Part.getValue(0);
+  }
+
+  if (NumRegs == 1 || Flag)
+    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+    // flagged to it. That is the CopyToReg nodes and the user are considered
+    // a single scheduling unit. If we create a TokenFactor and return it as
+    // chain, then the TokenFactor is both a predecessor (operand) of the
+    // user as well as a successor (the TF operands are flagged to the user).
+    // c1, f1 = CopyToReg
+    // c2, f2 = CopyToReg
+    // c3     = TokenFactor c1, c2
+    // ...
+    //        = op c3, ..., f2
+    Chain = Chains[NumRegs-1];
+  else
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code,
+                                        bool HasMatching,unsigned MatchingIdx,
+                                        SelectionDAG &DAG,
+                                        std::vector<SDValue> &Ops) const {
+  EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
+  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
+  unsigned Flag = Code | (Regs.size() << 3);
+  if (HasMatching)
+    Flag |= 0x80000000 | (MatchingIdx << 16);
+  Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
+  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      assert(Reg < Regs.size() && "Mismatch in # registers expected");
+      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+    }
+  }
+}
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register.  Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+                      const TargetLowering &TLI,
+                      const TargetRegisterInfo *TRI) {
+  EVT FoundVT = MVT::Other;
+  const TargetRegisterClass *FoundRC = 0;
+  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+       E = TRI->regclass_end(); RCI != E; ++RCI) {
+    EVT ThisVT = MVT::Other;
+
+    const TargetRegisterClass *RC = *RCI;
+    // If none of the the value types for this register class are valid, we
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (TLI.isTypeLegal(*I)) {
+        // If we have already found this register in a different register class,
+        // choose the one with the largest VT specified.  For example, on
+        // PowerPC, we favor f64 register classes over f32.
+        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+          ThisVT = *I;
+          break;
+        }
+      }
+    }
+
+    if (ThisVT == MVT::Other) continue;
+
+    // NOTE: This isn't ideal.  In particular, this might allocate the
+    // frame pointer in functions that need it (due to them not being taken
+    // out of allocation, because a variable sized allocation hasn't been seen
+    // yet).  This is a slight code pessimization, but should still work.
+    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+      if (*I == Reg) {
+        // We found a matching register class.  Keep looking at others in case
+        // we find one with larger registers that this physreg is also in.
+        FoundRC = RC;
+        FoundVT = ThisVT;
+        break;
+      }
+  }
+  return FoundRC;
+}
+
+
+namespace llvm {
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+    public TargetLowering::AsmOperandInfo {
+public:
+  /// CallOperand - If this is the result output operand or a clobber
+  /// this is null, otherwise it is the incoming operand to the CallInst.
+  /// This gets modified as the asm is processed.
+  SDValue CallOperand;
+
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+
+  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+  }
+
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs,
+                         std::set<unsigned> &InputRegs,
+                         const TargetRegisterInfo &TRI) const {
+    if (isOutReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+    }
+    if (isInReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+    }
+  }
+
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+  /// corresponds to.  If there is no Value* for this operand, it returns
+  /// MVT::Other.
+  EVT getCallOperandValEVT(LLVMContext &Context, 
+                           const TargetLowering &TLI,
+                           const TargetData *TD) const {
+    if (CallOperandVal == 0) return MVT::Other;
+
+    if (isa<BasicBlock>(CallOperandVal))
+      return TLI.getPointerTy();
+
+    const llvm::Type *OpTy = CallOperandVal->getType();
+
+    // If this is an indirect operand, the operand is a pointer to the
+    // accessed type.
+    if (isIndirect)
+      OpTy = cast<PointerType>(OpTy)->getElementType();
+
+    // If OpTy is not a single value, it may be a struct/union that we
+    // can tile with integers.
+    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+      switch (BitSize) {
+      default: break;
+      case 1:
+      case 8:
+      case 16:
+      case 32:
+      case 64:
+      case 128:
+        OpTy = IntegerType::get(Context, BitSize);
+        break;
+      }
+    }
+
+    return TLI.getValueType(OpTy, true);
+  }
+
+private:
+  /// MarkRegAndAliases - Mark the specified register and all aliases in the
+  /// specified set.
+  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+                                const TargetRegisterInfo &TRI) {
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+    Regs.insert(Reg);
+    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+      for (; *Aliases; ++Aliases)
+        Regs.insert(*Aliases);
+  }
+};
+} // end llvm namespace.
+
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand.  We prefer to assign virtual registers, to allow the
+/// register allocator handle the assignment process.  However, if the asm uses
+/// features that we can't model on machineinstrs, we have SDISel do the
+/// allocation.  This produces generally horrible, but correct, code.
+///
+///   OpInfo describes the operand.
+///   Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGBuilder::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                     std::set<unsigned> &OutputRegs,
+                     std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = FuncInfo.Fn->getContext();
+
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+
+    // If there is an input constraint that matches this, we need to reserve
+    // the input register so no other inputs allocate to it.
+    isInReg = OpInfo.hasMatchingInput();
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SmallVector<unsigned, 4> Regs;
+
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other) {
+    // If this is a FP input in an integer register (or visa versa) insert a bit
+    // cast of the input value.  More generally, handle any case where the input
+    // value disagrees with the register class we plan to stick this in.
+    if (OpInfo.Type == InlineAsm::isInput &&
+        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+      // Try to convert to the first EVT that the reg class contains.  If the
+      // types are identical size, use a bitcast to convert (e.g. two differing
+      // vector types).
+      EVT RegVT = *PhysReg.second->vt_begin();
+      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+        // If the input is a FP value and we want it in FP registers, do a
+        // bitcast to the corresponding integer type.  This turns an f64 value
+        // into i64, which can be passed with two i32 values on a 32-bit
+        // machine.
+        RegVT = EVT::getIntegerVT(Context, 
+                                  OpInfo.ConstraintVT.getSizeInBits());
+        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      }
+    }
+
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+  }
+
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (unsigned AssignedReg = PhysReg.first) {
+    const TargetRegisterClass *RC = PhysReg.second;
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *RC->vt_begin();
+
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *RC->vt_begin();
+
+    // This is a explicit reference to a physical register.
+    Regs.push_back(AssignedReg);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = RC->begin();
+      for (; *I != AssignedReg; ++I)
+        assert(I != RC->end() && "Didn't find reg!");
+
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != RC->end() && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+    return;
+  }
+
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  if (const TargetRegisterClass *RC = PhysReg.second) {
+    RegVT = *RC->vt_begin();
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = RegVT;
+
+    // Create the appropriate number of virtual registers.
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    for (; NumRegs; --NumRegs)
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+    return;
+  }
+  
+  // This is a reference to a register class that doesn't directly correspond
+  // to an LLVM register class.  Allocate NumRegs consecutive, available,
+  // registers from the class.
+  std::vector<unsigned> RegClassRegs
+    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                            OpInfo.ConstraintVT);
+
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+  unsigned NumAllocated = 0;
+  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+    unsigned Reg = RegClassRegs[i];
+    // See if this register is available.
+    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
+        (isInReg  && InputRegs.count(Reg))) {    // Already used.
+      // Make sure we find consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Check to see if this register is allocatable (i.e. don't give out the
+    // stack pointer).
+    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+    if (!RC) {        // Couldn't allocate this register.
+      // Reset NumAllocated to make sure we return consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Okay, this register is good, we can use it.
+    ++NumAllocated;
+
+    // If we allocated enough consecutive registers, succeed.
+    if (NumAllocated == NumRegs) {
+      unsigned RegStart = (i-NumAllocated)+1;
+      unsigned RegEnd   = i+1;
+      // Mark all of the allocated registers used.
+      for (unsigned i = RegStart; i != RegEnd; ++i)
+        Regs.push_back(RegClassRegs[i]);
+
+      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+                                         OpInfo.ConstraintVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+      return;
+    }
+  }
+
+  // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+static bool
+hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+                          const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+    
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
+  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+  /// ConstraintOperands - Information about all of the constraints.
+  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  std::vector<InlineAsm::ConstraintInfo>
+    ConstraintInfos = IA->ParseConstraints();
+
+  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
+  
+  SDValue Chain, Flag;
+  
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    EVT OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpVT = TLI.getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    // BasicBlocks are labels, currently appearing only in asm's.
+    if (OpInfo.CallOperandVal) {
+      // Strip bitcasts, if any.  This mostly comes up for functions.
+      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
+
+      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      }
+
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+    }
+
+    OpInfo.ConstraintVT = OpVT;
+  }
+
+  // Second pass over the constraints: compute which constraint option to use
+  // and assign registers to constraints that want a specific physreg.
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          llvm_report_error("Unsupported asm: input constraint"
+                            " with a matching output constraint of incompatible"
+                            " type!");
+        }
+        Input.ConstraintVT = OpInfo.ConstraintVT;
+      }
+    }
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert(OpInfo.Type == InlineAsm::isInput &&
+             "Can only indirectify direct input operands!");
+
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, getCurDebugLoc(),
+                             OpInfo.CallOperand, StackSlot, NULL, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+  ConstraintInfos.clear();
+
+
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to register class operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDValue> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+
+
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        llvm_report_error("Couldn't allocate output reg for"
+                          " constraint '" + OpInfo.ConstraintCode + "'!");
+      }
+
+      // If this is an indirect operand, store through the pointer after the
+      // asm.
+      if (OpInfo.isIndirect) {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      } else {
+        // This is the result value of the call.
+        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+               "Bad inline asm!");
+        // Concatenate this output onto the outputs list.
+        RetValRegs.append(OpInfo.AssignedRegs);
+      }
+
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+                                               6 /* EARLYCLOBBER REGDEF */ :
+                                               2 /* REGDEF */ ,
+                                               false,
+                                               0,
+                                               DAG, AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDValue InOperandVal = OpInfo.CallOperand;
+
+      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = OpInfo.getMatchedOperand();
+
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = 2;  // The first operand.
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned OpFlag =
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
+                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
+                  (OpFlag & 7) == 4 /*MEM*/) &&
+                 "Skipped past definitions?");
+          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+        }
+
+        unsigned OpFlag =
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+        if ((OpFlag & 7) == 2 /*REGDEF*/
+            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
+          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+          if (OpInfo.isIndirect) {
+            llvm_report_error("Don't know how to handle tied indirect "
+                              "register inputs yet!");
+          }
+          RegsForValue MatchedRegs;
+          MatchedRegs.TLI = &TLI;
+          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MatchedRegs.RegVTs.push_back(RegVT);
+          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+               i != e; ++i)
+            MatchedRegs.Regs.
+              push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+          // Use the produced MatchedRegs object to
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                    Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
+                                           true, OpInfo.getMatchedOperand(),
+                                           DAG, AsmNodeOperands);
+          break;
+        } else {
+          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
+          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
+                 "Unexpected number of operands");
+          // Add information to the INLINEASM node to know about this input.
+          // See InlineAsm.h isUseOperandTiedToDef.
+          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
+          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                          TLI.getPointerTy()));
+          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+          break;
+        }
+      }
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        assert(!OpInfo.isIndirect &&
+               "Don't know how to handle indirect other inputs yet!");
+
+        std::vector<SDValue> Ops;
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+                                         hasMemory, Ops, DAG);
+        if (Ops.empty()) {
+          llvm_report_error("Invalid operand for inline asm"
+                            " constraint '" + OpInfo.ConstraintCode + "'!");
+        }
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+        break;
+      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = 4/*MEM*/ | (1<<3);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect &&
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        llvm_report_error("Couldn't allocate input reg for"
+                          " constraint '"+ OpInfo.ConstraintCode +"'!");
+      }
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                        Chain, &Flag);
+
+      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
+                                               DAG, AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
+                                                 false, 0, DAG,AsmNodeOperands);
+      break;
+    }
+    }
+  }
+
+  // Finish up input operands.
+  AsmNodeOperands[0] = Chain;
+  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+                      DAG.getVTList(MVT::Other, MVT::Flag),
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+                                             Chain, &Flag);
+
+    // FIXME: Why don't we do this for inline asms with MRVs?
+    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+      EVT ResultType = TLI.getValueType(CS.getType());
+
+      // If any of the results of the inline asm is a vector, it may have the
+      // wrong width/num elts.  This can happen for register classes that can
+      // contain multiple different value types.  The preg or vreg allocated may
+      // not have the same VT as was expected.  Convert it to the right type
+      // with bit_convert.
+      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+                          ResultType, Val);
+
+      } else if (ResultType != Val.getValueType() &&
+                 ResultType.isInteger() && Val.getValueType().isInteger()) {
+        // If a result value was tied to an input value, the computed result may
+        // have a wider width than the expected result.  Extract the relevant
+        // portion.
+        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+      }
+
+      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+    }
+
+    setValue(CS.getInstruction(), Val);
+    // Don't need to use this as a chain in this case.
+    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+      return;
+  }
+
+  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
+
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    Value *Ptr = IndirectStoresToEmit[i].second;
+    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+                                             Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+
+  }
+
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDValue, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
+    OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
+                                    StoresToEmit[i].first,
+                                    getValue(StoresToEmit[i].second),
+                                    StoresToEmit[i].second, 0));
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                        &OutChains[0], OutChains.size());
+  DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
+  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+                           getRoot(), getValue(I.getOperand(0)),
+                           DAG.getSrcValue(I.getOperand(0)));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(1))));
+}
+
+void SelectionDAGBuilder::visitVACopy(CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getOperand(1)),
+                          getValue(I.getOperand(2)),
+                          DAG.getSrcValue(I.getOperand(1)),
+                          DAG.getSrcValue(I.getOperand(2))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+                            bool RetSExt, bool RetZExt, bool isVarArg,
+                            bool isInreg, unsigned NumFixedArgs,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
+                            SDValue Callee,
+                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
+
+  assert((!isTailCall || PerformTailCallOpt) &&
+         "isTailCall set when tail-call optimizations are disabled!");
+
+  // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      SDValue Op = SDValue(Args[i].Node.getNode(),
+                           Args[i].Node.getResNo() + Value);
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        getTargetData()->getABITypeAlignment(ArgTy);
+
+      if (Args[i].isZExt)
+        Flags.setZExt();
+      if (Args[i].isSExt)
+        Flags.setSExt();
+      if (Args[i].isInReg)
+        Flags.setInReg();
+      if (Args[i].isSRet)
+        Flags.setSRet();
+      if (Args[i].isByVal) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should come from FE.  BE will guess if this
+        // info is not there but there are cases it cannot get right.
+        if (Args[i].Alignment)
+          FrameAlign = Args[i].Alignment;
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (Args[i].isNest)
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+      SmallVector<SDValue, 4> Parts(NumParts);
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+      if (Args[i].isSExt)
+        ExtendKind = ISD::SIGN_EXTEND;
+      else if (Args[i].isZExt)
+        ExtendKind = ISD::ZERO_EXTEND;
+
+      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
+
+      for (unsigned j = 0; j != NumParts; ++j) {
+        // if it isn't first piece, alignment must be 1
+        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
+    }
+  }
+
+  // Check if target-dependent constraints permit a tail call here.
+  // Target-independent constraints should be checked by the caller.
+  if (isTailCall &&
+      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
+    isTailCall = false;
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    SDValue ReturnValue =
+      getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
+                       AssertOp);
+    ReturnValues.push_back(ReturnValue);
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actualy look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+
+  return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) {
+  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  llvm_unreachable("LowerOperation not implemented for this target!");
+  return SDValue();
+}
+
+
+void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
+  SDValue Op = getValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
+  // If this is the entry block, emit arguments.
+  Function &F = *LLVMBB->getParent();
+  SelectionDAG &DAG = SDB->DAG;
+  SDValue OldRoot = DAG.getRoot();
+  DebugLoc dl = SDB->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+  SmallVector<ISD::InputArg, 16> Ins;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<EVT, 4> OutVTs;
+  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 
+                OutVTs, OutsFlags, TLI);
+  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), 
+    OutVTs, OutsFlags, DAG);
+  if (!FLI.CanLowerReturn) {
+    // Put in an sret pointer parameter before all the other parameters.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+    // NOTE: Assuming that a pointer will never break down to more than one VT
+    // or one register.
+    ISD::ArgFlagsTy Flags;
+    Flags.setSRet();
+    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    Ins.push_back(RetArg);
+  }
+
+  // Set up the incoming argument description vector.
+  unsigned Idx = 1;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerFormalArguments emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerFormalArguments emitted a value with the wrong type!");
+        });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  if (!FLI.CanLowerReturn) {
+    // Create a virtual register for the sret pointer, and put in a copy
+    // from the sret argument into it.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    EVT VT = ValueVTs[0];
+    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
+                                        VT, AssertOp);
+
+    MachineFunction& MF = SDB->DAG.getMachineFunction();
+    MachineRegisterInfo& RegInfo = MF.getRegInfo();
+    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+    FLI.DemoteRegister = SRetReg;
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue);
+    DAG.setRoot(NewRoot);
+    
+    // i indexes lowered arguments.  Bump it past the hidden sret argument.
+    // Idx indexes LLVM arguments.  Don't touch it.
+    ++i;
+  }
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    for (unsigned Value = 0; Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+                                             PartVT, VT, AssertOp));
+      }
+      i += NumParts;
+    }
+    if (!I->use_empty()) {
+      SDB->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
+                                          SDB->getCurDebugLoc()));
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      SDB->CopyToExportRegsIfNeeded(I);
+    }
+  }
+  assert(i == InVals.size() && "Argument register count mismatch!");
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
+}
+
+/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+/// ensure constants are generated when needed.  Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input.  We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB.  As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      unsigned Reg;
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = SDB->ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo->CreateRegForValue(C);
+          SDB->CopyValueToVirtualRegister(C, RegOut);
+        }
+        Reg = RegOut;
+      } else {
+        Reg = FuncInfo->ValueMap[PHIOp];
+        if (Reg == 0) {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo->CreateRegForValue(PHIOp);
+          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+        Reg += NumRegisters;
+      }
+    }
+  }
+  SDB->ConstantsOut.clear();
+}
+
+/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
+/// supports legal types, and it emits MachineInstrs directly instead of
+/// creating SelectionDAG nodes.
+///
+bool
+SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
+                                                      FastISel *F) {
+  TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+    PHINode *PN;
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::iterator I = SuccBB->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegForValue to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
+        else {
+          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      unsigned Reg = F->getRegForValue(PHIOp);
+      if (Reg == 0) {
+        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+    }
+  }
+
+  return true;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 0000000000000..244f9b5019e1c
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,487 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+/// Also, targets can overload any lowering method.
+///
+class SelectionDAGBuilder {
+  MachineBasicBlock *CurMBB;
+
+  /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
+  DebugLoc CurDebugLoc;
+
+  DenseMap<const Value*, SDValue> NodeMap;
+
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  SmallVector<SDValue, 8> PendingLoads;
+
+  /// PendingExports - CopyToReg nodes that copy values to virtual registers
+  /// for export to other blocks need to be emitted before any terminator
+  /// instruction, but they have no other ordering requirements. We bunch them
+  /// up and the emit a single tokenfactor for them just before terminator
+  /// instructions.
+  SmallVector<SDValue, 8> PendingExports;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    APInt size() const {
+      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    Constant *LT;
+    Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const Case& C1, const Case& C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator () (const CaseBits& C1, const CaseBits& C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
+
+  /// CaseBlock - This structure is used to communicate between
+  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+  /// blocks needed by multi-case switch statements.
+  struct CaseBlock {
+    CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle,
+              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+              MachineBasicBlock *me)
+      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+        TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+    // CC - the condition code to use for the case block's setcc node
+    ISD::CondCode CC;
+    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+    // Emit by default LHS op RHS. MHS is used for range comparisons:
+    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+    Value *CmpLHS, *CmpMHS, *CmpRHS;
+    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+    MachineBasicBlock *TrueBB, *FalseBB;
+    // ThisBB - the block into which to emit the code for the setcc and branches
+    MachineBasicBlock *ThisBB;
+  };
+  struct JumpTable {
+    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+  
+    /// Reg - the virtual register containing the index of the jump table entry
+    //. to jump to.
+    unsigned Reg;
+    /// JTI - the JumpTableIndex for this jump table in the function.
+    unsigned JTI;
+    /// MBB - the MBB into which to emit the code for the indirect jump.
+    MachineBasicBlock *MBB;
+    /// Default - the MBB of the default bb, which is a successor of the range
+    /// check MBB.  This is when updating PHI nodes in successors.
+    MachineBasicBlock *Default;
+  };
+  struct JumpTableHeader {
+    JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
+                    bool E = false):
+      First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+    APInt First;
+    APInt Last;
+    Value *SValue;
+    MachineBasicBlock *HeaderBB;
+    bool Emitted;
+  };
+  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+  struct BitTestCase {
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+      Mask(M), ThisBB(T), TargetBB(Tr) { }
+    uint64_t Mask;
+    MachineBasicBlock* ThisBB;
+    MachineBasicBlock* TargetBB;
+  };
+
+  typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+  struct BitTestBlock {
+    BitTestBlock(APInt F, APInt R, Value* SV,
+                 unsigned Rg, bool E,
+                 MachineBasicBlock* P, MachineBasicBlock* D,
+                 const BitTestInfo& C):
+      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+      Parent(P), Default(D), Cases(C) { }
+    APInt First;
+    APInt Range;
+    Value  *SValue;
+    unsigned Reg;
+    bool Emitted;
+    MachineBasicBlock *Parent;
+    MachineBasicBlock *Default;
+    BitTestInfo Cases;
+  };
+
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+  AliasAnalysis *AA;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<JumpTableBlock> JTCases;
+  /// BitTestCases - Vector of BitTestBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<BitTestBlock> BitTestCases;
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to
+  /// scheduler custom lowering), track the change here.
+  DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping;
+
+  // Emit PHI-node-operand constants only once even if used by multiple
+  // PHI nodes.
+  DenseMap<Constant*, unsigned> ConstantsOut;
+
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  /// OptLevel - What optimization level we're generating code for.
+  /// 
+  CodeGenOpt::Level OptLevel;
+  
+  /// GFI - Garbage collection metadata for the function.
+  GCFunctionInfo *GFI;
+
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
+  SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
+                      FunctionLoweringInfo &funcinfo,
+                      CodeGenOpt::Level ol)
+    : CurDebugLoc(DebugLoc::getUnknownLoc()), 
+      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false),
+      Context(dag.getContext()) {
+  }
+
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+  /// clear - Clear out the curret SelectionDAG and the associated
+  /// state and prepare this SelectionDAGBuilder object to be used
+  /// for a new block. This doesn't clear out information about
+  /// additional blocks that are needed to complete switch lowering
+  /// or PHI node updating; that information is cleared out as it is
+  /// consumed.
+  void clear();
+
+  /// getRoot - Return the current virtual root of the Selection DAG,
+  /// flushing any PendingLoad items. This must be done before emitting
+  /// a store or any other node that may need to be ordered after any
+  /// prior load instructions.
+  ///
+  SDValue getRoot();
+
+  /// getControlRoot - Similar to getRoot, but instead of flushing all the
+  /// PendingLoad items, flush all the PendingExports items. It is necessary
+  /// to do this before emitting a terminator instruction.
+  ///
+  SDValue getControlRoot();
+
+  DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+  void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; }
+
+  void CopyValueToVirtualRegister(Value *V, unsigned Reg);
+
+  void visit(Instruction &I);
+
+  void visit(unsigned Opcode, User &I);
+
+  void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; }
+
+  SDValue getValue(const Value *V);
+
+  void setValue(const Value *V, SDValue NewN) {
+    SDValue &N = NodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                            std::set<unsigned> &OutputRegs, 
+                            std::set<unsigned> &InputRegs);
+
+  void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            unsigned Opc);
+  void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB,
+                                    MachineBasicBlock *FBB,
+                                    MachineBasicBlock *CurBB);
+  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+  bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(Value *V);
+  void ExportFromCurrentBlock(Value *V);
+  void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall,
+                   MachineBasicBlock *LandingPad = NULL);
+
+private:
+  // Terminator instructions.
+  void visitRet(ReturnInst &I);
+  void visitBr(BranchInst &I);
+  void visitSwitch(SwitchInst &I);
+  void visitIndirectBr(IndirectBrInst &I);
+  void visitUnreachable(UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              Value* SV,
+                              MachineBasicBlock* Default);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          Value* SV,
+                          MachineBasicBlock* Default);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               Value* SV,
+                               MachineBasicBlock* Default);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                Value* SV,
+                                MachineBasicBlock* Default);  
+public:
+  void visitSwitchCase(CaseBlock &CB);
+  void visitBitTestHeader(BitTestBlock &B);
+  void visitBitTestCase(MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        BitTestCase &B);
+  void visitJumpTable(JumpTable &JT);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH);
+  
+private:
+  // These all get lowered before this pass.
+  void visitInvoke(InvokeInst &I);
+  void visitUnwind(UnwindInst &I);
+
+  void visitBinary(User &I, unsigned OpCode);
+  void visitShift(User &I, unsigned Opcode);
+  void visitAdd(User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(User &I);
+  void visitMul(User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(User &I);
+  void visitFCmp(User &I);
+  // Visit the conversion instructions
+  void visitTrunc(User &I);
+  void visitZExt(User &I);
+  void visitSExt(User &I);
+  void visitFPTrunc(User &I);
+  void visitFPExt(User &I);
+  void visitFPToUI(User &I);
+  void visitFPToSI(User &I);
+  void visitUIToFP(User &I);
+  void visitSIToFP(User &I);
+  void visitPtrToInt(User &I);
+  void visitIntToPtr(User &I);
+  void visitBitCast(User &I);
+
+  void visitExtractElement(User &I);
+  void visitInsertElement(User &I);
+  void visitShuffleVector(User &I);
+
+  void visitExtractValue(ExtractValueInst &I);
+  void visitInsertValue(InsertValueInst &I);
+
+  void visitGetElementPtr(User &I);
+  void visitSelect(User &I);
+
+  void visitAlloca(AllocaInst &I);
+  void visitLoad(LoadInst &I);
+  void visitStore(StoreInst &I);
+  void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
+  void visitCall(CallInst &I);
+  void visitInlineAsm(CallSite CS);
+  const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);
+
+  void visitPow(CallInst &I);
+  void visitExp2(CallInst &I);
+  void visitExp(CallInst &I);
+  void visitLog(CallInst &I);
+  void visitLog2(CallInst &I);
+  void visitLog10(CallInst &I);
+
+  void visitVAStart(CallInst &I);
+  void visitVAArg(VAArgInst &I);
+  void visitVAEnd(CallInst &I);
+  void visitVACopy(CallInst &I);
+
+  void visitUserOp1(Instruction &I) {
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+  }
+  void visitUserOp2(Instruction &I) {
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+  }
+  
+  const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ab5f21e4337c4..c39437f986470 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -13,7 +13,8 @@
 
 #define DEBUG_TYPE "isel"
 #include "ScheduleDAGSDNodes.h"
-#include "SelectionDAGBuild.h"
+#include "SelectionDAGBuilder.h"
+#include "FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
@@ -279,14 +280,14 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) :
   MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
   CurDAG(new SelectionDAG(TLI, *FuncInfo)),
-  SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)),
+  SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
   DAGSize(0)
 {}
 
 SelectionDAGISel::~SelectionDAGISel() {
-  delete SDL;
+  delete SDB;
   delete CurDAG;
   delete FuncInfo;
 }
@@ -331,8 +332,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
   CurDAG->init(*MF, MMI, DW);
-  FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel);
-  SDL->init(GFI, *AA);
+  FuncInfo->set(Fn, *MF, EnableFastISel);
+  SDB->init(GFI, *AA);
 
   for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
     if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator()))
@@ -361,29 +362,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   return true;
 }
 
-static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB,
-                          MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I)
-    if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-      // Apply the catch info to DestBB.
-      AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]);
-#ifndef NDEBUG
-      if (!FLI.MBBMap[SrcBB]->isLandingPad())
-        FLI.CatchInfoFound.insert(EHSel);
-#endif
-    }
-}
-
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
                                         BasicBlock::iterator Begin,
-                                        BasicBlock::iterator End) {
-  SDL->setCurrentBasicBlock(BB);
+                                        BasicBlock::iterator End,
+                                        bool &HadTailCall) {
+  SDB->setCurrentBasicBlock(BB);
   MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata();
   unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
 
   // Lower all of the non-terminator instructions. If a call is emitted
   // as a tail call, cease emitting nodes for this block.
-  for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) {
+  for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
     if (MDDbgKind) {
       // Update DebugLoc if debug information is attached with this
       // instruction.
@@ -391,37 +380,38 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
         if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
           DILocation DILoc(Dbg);
           DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-          SDL->setCurDebugLoc(Loc);
+          SDB->setCurDebugLoc(Loc);
           if (MF->getDefaultDebugLoc().isUnknown())
             MF->setDefaultDebugLoc(Loc);
         }
     }
     if (!isa<TerminatorInst>(I))
-      SDL->visit(*I);
+      SDB->visit(*I);
   }
 
-  if (!SDL->HasTailCall) {
+  if (!SDB->HasTailCall) {
     // Ensure that all instructions which are used outside of their defining
     // blocks are available as virtual registers.  Invoke is handled elsewhere.
     for (BasicBlock::iterator I = Begin; I != End; ++I)
       if (!isa<PHINode>(I) && !isa<InvokeInst>(I))
-        SDL->CopyToExportRegsIfNeeded(I);
+        SDB->CopyToExportRegsIfNeeded(I);
 
     // Handle PHI nodes in successor blocks.
     if (End == LLVMBB->end()) {
       HandlePHINodesInSuccessorBlocks(LLVMBB);
 
       // Lower the terminator after the copies are emitted.
-      SDL->visit(*LLVMBB->getTerminator());
+      SDB->visit(*LLVMBB->getTerminator());
     }
   }
 
   // Make sure the root of the DAG is up-to-date.
-  CurDAG->setRoot(SDL->getControlRoot());
+  CurDAG->setRoot(SDB->getControlRoot());
 
   // Final step, emit the lowered DAG as machine code.
   CodeGenAndEmitDAG();
-  SDL->clear();
+  HadTailCall = SDB->HasTailCall;
+  SDB->clear();
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -629,9 +619,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Creation", GroupName);
-    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
   } else {
-    BB = Scheduler->EmitSchedule(&SDL->EdgeMapping);
+    BB = Scheduler->EmitSchedule(&SDB->EdgeMapping);
   }
 
   // Free the scheduler state.
@@ -701,7 +691,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       unsigned LabelID = MMI->addLandingPad(BB);
 
       const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL);
-      BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID);
+      BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
 
       // Mark exception register as live in.
       unsigned Reg = TLI.getExceptionAddressRegister();
@@ -732,7 +722,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         if (I == E)
           // No catch info found - try to extract some from the successor.
-          copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
+          CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo);
       }
     }
 
@@ -741,9 +731,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
-        CurDAG->setRoot(SDL->getControlRoot());
+        CurDAG->setRoot(SDB->getControlRoot());
         CodeGenAndEmitDAG();
-        SDL->clear();
+        SDB->clear();
       }
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
@@ -796,8 +786,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
               R = FuncInfo->CreateRegForValue(BI);
           }
 
-          SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
-          SelectBasicBlock(LLVMBB, BI, next(BI));
+          SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
+
+          bool HadTailCall = false;
+          SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall);
+
+          // If the call was emitted as a tail call, we're done with the block.
+          if (HadTailCall) {
+            BI = End;
+            break;
+          }
+
           // If the instruction was codegen'd with multiple blocks,
           // inform the FastISel object where to resume inserting.
           FastIS->setCurrentBlock(BB);
@@ -826,8 +825,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     if (BI != End) {
       // If FastISel is run and it has known DebugLoc then use it.
       if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
-        SDL->setCurDebugLoc(FastIS->getCurDebugLoc());
-      SelectBasicBlock(LLVMBB, BI, End);
+        SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
+      bool HadTailCall;
+      SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
     }
 
     FinishBasicBlock();
@@ -843,150 +843,150 @@ SelectionDAGISel::FinishBasicBlock() {
   DEBUG(BB->dump());
 
   DEBUG(errs() << "Total amount of phi nodes to update: "
-               << SDL->PHINodesToUpdate.size() << "\n");
-  DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i)
+               << SDB->PHINodesToUpdate.size() << "\n");
+  DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
           errs() << "Node " << i << " : ("
-                 << SDL->PHINodesToUpdate[i].first
-                 << ", " << SDL->PHINodesToUpdate[i].second << ")\n");
+                 << SDB->PHINodesToUpdate[i].first
+                 << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
 
   // Next, now that we know what the last MBB the LLVM BB expanded is, update
   // PHI nodes in successors.
-  if (SDL->SwitchCases.empty() &&
-      SDL->JTCases.empty() &&
-      SDL->BitTestCases.empty()) {
-    for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+  if (SDB->SwitchCases.empty() &&
+      SDB->JTCases.empty() &&
+      SDB->BitTestCases.empty()) {
+    for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
-      PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
-    SDL->PHINodesToUpdate.clear();
+    SDB->PHINodesToUpdate.clear();
     return;
   }
 
-  for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
-    if (!SDL->BitTestCases[i].Emitted) {
+    if (!SDB->BitTestCases[i].Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->BitTestCases[i].Parent;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->BitTestCases[i].Parent;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDL->visitBitTestHeader(SDL->BitTestCases[i]);
-      CurDAG->setRoot(SDL->getRoot());
+      SDB->visitBitTestHeader(SDB->BitTestCases[i]);
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
-    for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) {
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->BitTestCases[i].Cases[j].ThisBB;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
       if (j+1 != ej)
-        SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB,
-                              SDL->BitTestCases[i].Reg,
-                              SDL->BitTestCases[i].Cases[j]);
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
       else
-        SDL->visitBitTestCase(SDL->BitTestCases[i].Default,
-                              SDL->BitTestCases[i].Reg,
-                              SDL->BitTestCases[i].Cases[j]);
+        SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j]);
 
 
-      CurDAG->setRoot(SDL->getRoot());
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
-      if (PHIBB == SDL->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+      if (PHIBB == SDB->BitTestCases[i].Default) {
+        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                   false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                   false));
-        PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases.
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
       // One of "cases" BB.
-      for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size();
+      for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
-        MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB;
+        MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
         if (cBB->succ_end() !=
             std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
-          PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second,
+          PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
                                                     false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
     }
   }
-  SDL->BitTestCases.clear();
+  SDB->BitTestCases.clear();
 
   // If the JumpTable record is filled in, then we need to emit a jump table.
   // Updating the PHI nodes is tricky in this case, since we need to determine
   // whether the PHI is a successor of the range check MBB or the jump table MBB
-  for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
     // Lower header first, if it wasn't already lowered
-    if (!SDL->JTCases[i].first.Emitted) {
+    if (!SDB->JTCases[i].first.Emitted) {
       // Set the current basic block to the mbb we wish to insert the code into
-      BB = SDL->JTCases[i].first.HeaderBB;
-      SDL->setCurrentBasicBlock(BB);
+      BB = SDB->JTCases[i].first.HeaderBB;
+      SDB->setCurrentBasicBlock(BB);
       // Emit the code
-      SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first);
-      CurDAG->setRoot(SDL->getRoot());
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first);
+      CurDAG->setRoot(SDB->getRoot());
       CodeGenAndEmitDAG();
-      SDL->clear();
+      SDB->clear();
     }
 
     // Set the current basic block to the mbb we wish to insert the code into
-    BB = SDL->JTCases[i].second.MBB;
-    SDL->setCurrentBasicBlock(BB);
+    BB = SDB->JTCases[i].second.MBB;
+    SDB->setCurrentBasicBlock(BB);
     // Emit the code
-    SDL->visitJumpTable(SDL->JTCases[i].second);
-    CurDAG->setRoot(SDL->getRoot());
+    SDB->visitJumpTable(SDB->JTCases[i].second);
+    CurDAG->setRoot(SDB->getRoot());
     CodeGenAndEmitDAG();
-    SDL->clear();
+    SDB->clear();
 
     // Update PHI Nodes
-    for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) {
-      MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first;
+    for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) {
+      MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first;
       MachineBasicBlock *PHIBB = PHI->getParent();
       assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
              "This is not a machine PHI node that we are updating!");
       // "default" BB. We can go there only from header BB.
-      if (PHIBB == SDL->JTCases[i].second.Default) {
+      if (PHIBB == SDB->JTCases[i].second.Default) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand
-          (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB));
+          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
       if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
         PHI->addOperand
-          (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false));
+          (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
       }
     }
   }
-  SDL->JTCases.clear();
+  SDB->JTCases.clear();
 
   // If the switch block involved a branch to one of the actual successors, we
   // need to update PHI nodes in that block.
-  for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) {
-    MachineInstr *PHI = SDL->PHINodesToUpdate[i].first;
+  for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
     assert(PHI->getOpcode() == TargetInstrInfo::PHI &&
            "This is not a machine PHI node that we are updating!");
     if (BB->isSuccessor(PHI->getParent())) {
-      PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second,
+      PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
     }
@@ -994,36 +994,36 @@ SelectionDAGISel::FinishBasicBlock() {
 
   // If we generated any switch lowering information, build and codegen any
   // additional DAGs necessary.
-  for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) {
+  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
     // Set the current basic block to the mbb we wish to insert the code into
-    MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB;
-    SDL->setCurrentBasicBlock(BB);
+    MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+    SDB->setCurrentBasicBlock(BB);
 
     // Emit the code
-    SDL->visitSwitchCase(SDL->SwitchCases[i]);
-    CurDAG->setRoot(SDL->getRoot());
+    SDB->visitSwitchCase(SDB->SwitchCases[i]);
+    CurDAG->setRoot(SDB->getRoot());
     CodeGenAndEmitDAG();
 
     // Handle any PHI nodes in successors of this chunk, as if we were coming
     // from the original BB before switch expansion.  Note that PHI nodes can
     // occur multiple times in PHINodesToUpdate.  We have to be very careful to
     // handle them the right number of times.
-    while ((BB = SDL->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
+    while ((BB = SDB->SwitchCases[i].TrueBB)) {  // Handle LHS and RHS.
       // If new BB's are created during scheduling, the edges may have been
       // updated. That is, the edge from ThisBB to BB may have been split and
       // BB's predecessor is now another block.
       DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI =
-        SDL->EdgeMapping.find(BB);
-      if (EI != SDL->EdgeMapping.end())
+        SDB->EdgeMapping.find(BB);
+      if (EI != SDB->EdgeMapping.end())
         ThisBB = EI->second;
       for (MachineBasicBlock::iterator Phi = BB->begin();
            Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
         // This value for this PHI node is recorded in PHINodesToUpdate, get it.
         for (unsigned pn = 0; ; ++pn) {
-          assert(pn != SDL->PHINodesToUpdate.size() &&
+          assert(pn != SDB->PHINodesToUpdate.size() &&
                  "Didn't find PHI entry!");
-          if (SDL->PHINodesToUpdate[pn].first == Phi) {
-            Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn].
+          if (SDB->PHINodesToUpdate[pn].first == Phi) {
+            Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn].
                                                       second, false));
             Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
             break;
@@ -1032,19 +1032,19 @@ SelectionDAGISel::FinishBasicBlock() {
       }
 
       // Don't process RHS if same block as LHS.
-      if (BB == SDL->SwitchCases[i].FalseBB)
-        SDL->SwitchCases[i].FalseBB = 0;
+      if (BB == SDB->SwitchCases[i].FalseBB)
+        SDB->SwitchCases[i].FalseBB = 0;
 
       // If we haven't handled the RHS, do so now.  Otherwise, we're done.
-      SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB;
-      SDL->SwitchCases[i].FalseBB = 0;
+      SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB;
+      SDB->SwitchCases[i].FalseBB = 0;
     }
-    assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0);
-    SDL->clear();
+    assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0);
+    SDB->clear();
   }
-  SDL->SwitchCases.clear();
+  SDB->SwitchCases.clear();
 
-  SDL->PHINodesToUpdate.clear();
+  SDB->PHINodesToUpdate.clear();
 }
 
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index ccc5e3c75c99b..c5adc5000dbac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -35,6 +35,9 @@ using namespace llvm;
 namespace llvm {
   template<>
   struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
     static bool hasEdgeDestLabels() {
       return true;
     }
@@ -48,8 +51,8 @@ namespace llvm {
     }
 
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
-    /// should actually target another edge source, not a node.  If this method is
-    /// implemented, getEdgeTarget should be implemented.
+    /// should actually target another edge source, not a node.  If this method
+    /// is implemented, getEdgeTarget should be implemented.
     template<typename EdgeIter>
     static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
       return true;
@@ -93,9 +96,16 @@ namespace llvm {
     }
     
 
-    static std::string getNodeLabel(const SDNode *Node,
-                                    const SelectionDAG *Graph,
-                                    bool ShortNames);
+    static std::string getSimpleNodeLabel(const SDNode *Node,
+                                          const SelectionDAG *G) {
+      std::string Result = Node->getOperationName(G);
+      {
+        raw_string_ostream OS(Result);
+        Node->print_details(OS, G);
+      }
+      return Result;
+    }
+    std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
     static std::string getNodeAttributes(const SDNode *N,
                                          const SelectionDAG *Graph) {
 #ifndef NDEBUG
@@ -121,14 +131,8 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
-                                                        const SelectionDAG *G,
-                                                        bool ShortNames) {
-  std::string Result = Node->getOperationName(G);
-  {
-    raw_string_ostream OS(Result);
-    Node->print_details(OS, G);
-  }
-  return Result;
+                                                        const SelectionDAG *G) {
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G);
 }
 
 
@@ -269,8 +273,8 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
     for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
     while (!FlaggedNodes.empty()) {
-      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(),
-                                                       DAG, false);
+      O << DOTGraphTraits<SelectionDAG*>
+	     ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
       FlaggedNodes.pop_back();
       if (!FlaggedNodes.empty())
         O << "\n    ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2ca52a48c2a9e..68bc2d6306b2c 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -532,11 +532,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
   InitLibcallNames(LibcallRoutineNames);
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
-
-  // Tell Legalize whether the assembler supports DEBUG_LOC.
-  const MCAsmInfo *TASM = TM.getMCAsmInfo();
-  if (!TASM || !TASM->hasDotLocAndDotFile())
-    setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
 }
 
 TargetLowering::~TargetLowering() {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 3909c56bdbb22..58763718f9b5d 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -2371,16 +2371,26 @@ namespace {
   struct DepthMBBCompare {
     typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
     bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      if (LHS.first > RHS.first) return true;   // Deeper loops first
-      return LHS.first == RHS.first &&
-        LHS.second->getNumber() < RHS.second->getNumber();
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
     }
   };
 }
 
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n");
+  DEBUG(errs() << MBB->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 20c4a28b1f3d3..237d0b5f4658a 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -12,7 +12,6 @@
 #include "Spiller.h"
 #include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -47,16 +46,14 @@ protected:
 
   MachineFunction *mf;
   LiveIntervals *lis;
-  LiveStacks *ls;
   MachineFrameInfo *mfi;
   MachineRegisterInfo *mri;
   const TargetInstrInfo *tii;
   VirtRegMap *vrm;
   
   /// Construct a spiller base. 
-  SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
-              VirtRegMap *vrm) :
-    mf(mf), lis(lis), ls(ls), vrm(vrm)
+  SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+    : mf(mf), lis(lis), vrm(vrm)
   {
     mfi = mf->getFrameInfo();
     mri = &mf->getRegInfo();
@@ -169,9 +166,8 @@ protected:
 class TrivialSpiller : public SpillerBase {
 public:
 
-  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
-                 VirtRegMap *vrm)
-    : SpillerBase(mf, lis, ls, vrm) {}
+  TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+    : SpillerBase(mf, lis, vrm) {}
 
   std::vector<LiveInterval*> spill(LiveInterval *li,
                                    SmallVectorImpl<LiveInterval*> &spillIs) {
@@ -188,7 +184,7 @@ private:
   const MachineLoopInfo *loopInfo;
   VirtRegMap *vrm;
 public:
-  StandardSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+  StandardSpiller(MachineFunction *mf, LiveIntervals *lis,
                   const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
     : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
 
@@ -203,12 +199,11 @@ public:
 }
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
-                                   LiveStacks *ls,
                                    const MachineLoopInfo *loopInfo,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
-    case trivial: return new TrivialSpiller(mf, lis, ls, vrm); break;
-    case standard: return new StandardSpiller(mf, lis, ls, loopInfo, vrm); break;
+    case trivial: return new TrivialSpiller(mf, lis, vrm); break;
+    case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break;
     default: llvm_unreachable("Unreachable!"); break;
   }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 7ec8e6d7ffb54..c6bd9857dbaf2 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -41,8 +41,7 @@ namespace llvm {
 
   /// Create and return a spiller object, as specified on the command line.
   Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
-                         LiveStacks *ls, const MachineLoopInfo *loopInfo,
-                         VirtRegMap *vrm);
+                         const MachineLoopInfo *loopInfo, VirtRegMap *vrm);
 }
 
 #endif
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 0000000000000..9c0b596c33f5a
--- /dev/null
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,249 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+                  cl::desc("Maximum instructions to consider tail duplicating"),
+                  cl::init(2), cl::Hidden);
+
+namespace {
+  /// TailDuplicatePass - Perform tail duplication.
+  class TailDuplicatePass : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    MachineModuleInfo *MMI;
+
+  public:
+    static char ID;
+    explicit TailDuplicatePass() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Tail Duplication"; }
+
+  private:
+    bool TailDuplicateBlocks(MachineFunction &MF);
+    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+  };
+
+  char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass() {
+  return new TailDuplicatePass();
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+  bool MadeChange = false;
+  bool MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration = false;
+    MadeChangeThisIteration |= TailDuplicateBlocks(MF);
+    MadeChange |= MadeChangeThisIteration;
+  }
+
+  return MadeChange;
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+
+    // Only duplicate blocks that end with unconditional branches.
+    if (MBB->canFallThrough())
+      continue;
+
+    MadeChange |= TailDuplicate(MBB, MF);
+
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      NumInstrDups -= MBB->size();
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+                                        MachineFunction &MF) {
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
+  // Set the limit on the number of instructions to duplicate, with a default
+  // of one less than the tail-merge threshold. When optimizing for size,
+  // duplicate only one, because one branch instruction can be eliminated to
+  // compensate for the duplication.
+  unsigned MaxDuplicateCount;
+  if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
+    // If the target has hardware branch prediction that can handle indirect
+    // branches, duplicating them can often make them predictable when there
+    // are common paths through the code.  The limit needs to be high enough
+    // to allow undoing the effects of tail merging.
+    MaxDuplicateCount = 20;
+  else if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    MaxDuplicateCount = 1;
+  else
+    MaxDuplicateCount = TailDuplicateSize;
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be profitable.
+  unsigned i = 0;
+  bool HasCall = false;
+  for (MachineBasicBlock::iterator I = TailBB->begin();
+       I != TailBB->end(); ++I, ++i) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable()) return false;
+    // Don't duplicate more than the threshold.
+    if (i == MaxDuplicateCount) return false;
+    // Remember if we saw a call.
+    if (I->getDesc().isCall()) HasCall = true;
+  }
+  // Heuristically, don't tail-duplicate calls if it would expand code size,
+  // as it's less likely to be worth the extra cost.
+  if (i > 1 && HasCall)
+    return false;
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+                                               TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    if (PredBB->succ_size() > 1) continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() != 1)
+      continue;
+    // Don't duplicate into a fall-through predecessor (at least for now).
+    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      continue;
+
+    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+    // Clone the contents of TailBB into PredBB.
+    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+         I != E; ++I) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(I);
+      PredBB->insert(PredBB->end(), NewMI);
+    }
+    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+         E = TailBB->succ_end(); I != E; ++I)
+       PredBB->addSuccessor(*I);
+
+    Changed = true;
+    ++NumTailDups;
+  }
+
+  // If TailBB was duplicated into all its predecessors except for the prior
+  // block, which falls through unconditionally, move the contents of this
+  // block into the prior block.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  // This has to check PrevBB->succ_size() because EH edges are ignored by
+  // AnalyzeBranch.
+  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+      !TailBB->hasAddressTaken()) {
+    DEBUG(errs() << "\nMerging into block: " << PrevBB
+          << "From MBB: " << *TailBB);
+    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
+    PrevBB.removeSuccessor(PrevBB.succ_begin());;
+    assert(PrevBB.succ_empty());
+    PrevBB.transferSuccessors(TailBB);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
+
+  // Remove all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // If there are any labels in the basic block, unregister them from
+  // MachineModuleInfo.
+  if (MMI && !MBB->empty()) {
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      if (I->isLabel())
+        // The label ID # is always operand #0, an immediate.
+        MMI->InvalidateLabel(I->getOperand(0).getImm());
+    }
+  }
+
+  // Remove the block.
+  MBB->eraseFromParent();
+}
+
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index c836286f60ff6..10c806677c932 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -1600,7 +1600,7 @@ private:
                   std::vector<MachineOperand*> &KillOps) {
 
     DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
-          << MBB.getBasicBlock()->getName() << "':\n");
+          << MBB.getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
     
-- 
cgit v1.3