645 files changed, 27962 insertions, 22623 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index e57ba7833295..71e0a832696c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeAliasSetPrinterPass(Registry);
   initializeNoAAPass(Registry);
   initializeBasicAliasAnalysisPass(Registry);
+  initializeBlockFrequencyPass(Registry);
   initializeBranchProbabilityInfoPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp
new file mode 100644
index 000000000000..4b86d1db1f04
--- /dev/null
+++ b/lib/Analysis/BlockFrequency.cpp
@@ -0,0 +1,59 @@
+//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequency.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                    true, true)
+
+char BlockFrequency::ID = 0;
+
+
+BlockFrequency::BlockFrequency() : FunctionPass(ID) {
+  initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequency::~BlockFrequency() {
+  delete BFI;
+}
+
+void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool BlockFrequency::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  BFI->doFunction(&F, &BPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
+  return BFI->getBlockFreq(BB);
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 812fac0bb751..e39cd221b5a7 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -25,7 +26,7 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
 
 char BranchProbabilityInfo::ID = 0;
 
-
+namespace {
 // Please note that BranchProbabilityAnalysis is not a FunctionPass.
 // It is created by BranchProbabilityInfo (which is a FunctionPass), which
 // provides a clear interface. Thanks to that, all heuristics and other
@@ -143,6 +144,7 @@ public:
 
   bool runOnFunction(Function &F);
 };
+} // end anonymous namespace
 
 // Calculate Edge Weights using "Return Heuristics". Predict a successor which
 // leads directly to Return Instruction will not be taken.
@@ -167,7 +169,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
 
   Value *Cond = BI->getCondition();
   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
-  if (!CI)
+  if (!CI || !CI->isEquality())
     return;
 
   Value *LHS = CI->getOperand(0);
@@ -184,7 +186,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
   // p == 0   ->   isProb = false
   // p != q   ->   isProb = true
   // p == q   ->   isProb = false;
-  bool isProb = !CI->isEquality();
+  bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
   if (!isProb)
     std::swap(Taken, NonTaken);
 
@@ -256,6 +258,10 @@ bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
   return false;
 }
 
+void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<LoopInfo>();
+    AU.setPreservesAll();
+}
 
 bool BranchProbabilityInfo::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfo>();
@@ -347,8 +353,8 @@ getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
 raw_ostream &
 BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
                                             BasicBlock *Dst) const {
-  BranchProbability Prob = getEdgeProbability(Src, Dst);
 
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
   OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
      << " probability is " << Prob
      << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 1a975bf4a582..ab846a26b4db 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
+  BlockFrequency.cpp
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 08a6065b31ac..7fca17eb69f6 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -771,12 +771,12 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
     return ConstantExpr::getInsertValue(
                                 cast<Constant>(IVI->getAggregateOperand()),
                                 cast<Constant>(IVI->getInsertedValueOperand()),
-                                IVI->idx_begin(), IVI->getNumIndices());
+                                IVI->getIndices());
 
   if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
     return ConstantExpr::getExtractValue(
                                     cast<Constant>(EVI->getAggregateOperand()),
-                                    EVI->idx_begin(), EVI->getNumIndices());
+                                    EVI->getIndices());
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                   Ops.data(), Ops.size(), TD);
@@ -1399,7 +1399,7 @@ llvm::ConstantFoldCall(Function *F,
             ConstantInt::get(F->getContext(), Res),
             ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
           };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+          return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
         }
       }
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index ef5d03a07135..ac5eeeb4706a 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -219,7 +219,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
 }
 
 /// createMemberType - Create debugging information entry for a member.
-DIType DIBuilder::createMemberType(StringRef Name, 
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, 
                                    DIFile File, unsigned LineNumber, 
                                    uint64_t SizeInBits, uint64_t AlignInBits,
                                    uint64_t OffsetInBits, unsigned Flags, 
@@ -227,7 +227,7 @@ DIType DIBuilder::createMemberType(StringRef Name,
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    File, // Or TheCU ? Ty ?
+    Scope,
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -786,7 +786,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
-  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
 }
 
 /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
@@ -802,9 +802,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
   if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+    return CallInst::Create(DeclareFn, Args, "", T);
   else
-    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
 }
 
 /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
@@ -819,7 +819,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+  return CallInst::Create(ValueFn, Args, "", InsertBefore);
 }
 
 /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
@@ -834,6 +834,6 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
 }
 
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 67f8147f4d61..b42e946f2ffa 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -727,37 +727,37 @@ void DIVariable::dump() const {
 
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
-static void fixupObjcLikeName(std::string &Str) {
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+  bool isObjCLike = false;
   for (size_t i = 0, e = Str.size(); i < e; ++i) {
     char C = Str[i];
-    if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
-        C == '(' || C == ')')
-      Str[i] = '.';
+    if (C == '[')
+      isObjCLike = true;
+
+    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+                       C == '+' || C == '(' || C == ')'))
+      Out.push_back('.');
+    else
+      Out.push_back(C);
   }
 }
 
 /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
 /// suitable to hold function specific information.
 NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getNamedMetadata(Name.str());
 }
 
 /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
 /// to hold function specific information.
 NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
-  SmallString<32> Out;
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
-                                      .toStringRef(Out));
-  
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
-                                    .toStringRef(Out));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getOrInsertNamedMetadata(Name.str());
 }
 
 
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index dde25565ad81..6535786668bc 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -96,8 +96,6 @@ void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
   OS << "Types in use by this module:\n";
   for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
        E = UsedTypes.end(); I != E; ++I) {
-    OS << "   ";
-    WriteTypeSymbolic(OS, *I, M);
-    OS << '\n';
+    OS << "   " << **I << '\n';
   }
 }
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index a0c42f0cbfa5..e5f0a77ab67d 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,15 +38,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
 
-// IVUsers behavior currently depends on this temporary indvars mode. The
-// option must be defined upstream from its uses.
-namespace llvm {
-  bool DisableIVRewrite = false;
-}
-cl::opt<bool, true> DisableIVRewriteOpt(
-  "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite),
-  cl::desc("Disable canonical induction variable rewriting"));
-
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
@@ -56,17 +46,20 @@ Pass *llvm::createIVUsersPass() {
 /// used by the given expression, within the context of analyzing the
 /// given loop.
 static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
-                          ScalarEvolution *SE) {
+                          ScalarEvolution *SE, LoopInfo *LI) {
   // An addrec is interesting if it's affine or if it has an interesting start.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // Keep things simple. Don't touch loop-variant strides.
+    // Keep things simple. Don't touch loop-variant strides unless they're
+    // only used outside the loop and we can simplify them.
     if (AR->getLoop() == L)
-      return AR->isAffine() || !L->contains(I);
+      return AR->isAffine() ||
+             (!L->contains(I) &&
+              SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
     // Otherwise recurse to see if the start value is interesting, and that
     // the step value is not interesting, since we don't yet know how to
     // do effective SCEV expansions for addrecs with interesting steps.
-    return isInteresting(AR->getStart(), I, L, SE) &&
-          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
+    return isInteresting(AR->getStart(), I, L, SE, LI) &&
+          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
   }
 
   // An add is interesting if exactly one of its operands is interesting.
@@ -74,7 +67,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
     bool AnyInterestingYet = false;
     for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
          OI != OE; ++OI)
-      if (isInteresting(*OI, I, L, SE)) {
+      if (isInteresting(*OI, I, L, SE, LI)) {
         if (AnyInterestingYet)
           return false;
         AnyInterestingYet = true;
@@ -89,7 +82,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (!SE->isSCEVable(I->getType()))
     return false;   // Void and FP expressions cannot be reduced.
 
@@ -100,11 +93,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
     return false;
 
-  // We expect Sign/Zero extension to be eliminated from the IR before analyzing
-  // any downstream uses.
-  if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I)))
-    return false;
-
   if (!Processed.insert(I))
     return true;    // Instruction already handled.
 
@@ -113,7 +101,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
 
   // If we've come to an uninteresting expression, stop the traversal and
   // call this a user.
-  if (!isInteresting(ISE, I, L, SE))
+  if (!isInteresting(ISE, I, L, SE, LI))
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -136,13 +124,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User, Phi)) {
+          !AddUsersIfInteresting(User)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
-    } else if (Processed.count(User) ||
-               !AddUsersIfInteresting(User, Phi)) {
+    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -150,7 +137,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I, Phi));
+      IVUses.push_back(new IVStrideUse(this, User, I));
       IVStrideUse &NewUse = IVUses.back();
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
@@ -165,8 +152,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) {
-  IVUses.push_back(new IVStrideUse(this, User, Operand, Phi));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
 }
 
@@ -194,7 +181,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I, cast<PHINode>(I));
+    (void)AddUsersIfInteresting(I);
 
   return false;
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 9d78f8bf4044..8709f6bf9d26 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2204,15 +2204,15 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
   if (TrueVal == FalseVal)
     return TrueVal;
 
-  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
-    return FalseVal;
-  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
-    return TrueVal;
   if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
     if (isa<Constant>(TrueVal))
       return TrueVal;
     return FalseVal;
   }
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
 
   return 0;
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index f130f30c49da..89755da85097 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -592,8 +592,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
       return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
-                                     Ex->idx_begin(),
-                                     Ex->idx_end()))
+                                     Ex->getIndices()))
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -607,9 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
       ArrayRef<unsigned> Indices = CE->getIndices();
-      if (Value *W = FindInsertedValue(CE->getOperand(0),
-                                       Indices.begin(),
-                                       Indices.end()))
+      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
         if (W != V)
           return findValueImpl(W, OffsetOk, Visited);
     }
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 64d215c37cc7..2283db0bc482 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
     MemDepResult Res = MDA.getDependency(Inst);
     if (!Res.isNonLocal()) {
-      assert(Res.isClobber() != Res.isDef() &&
-             "Local dep should be def or clobber!");
+      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+              "Local dep should be unknown, def or clobber!");
       Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        static_cast<BasicBlock *>(0)));
@@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
         const MemDepResult &Res = I->getResult();
-        assert(Res.isClobber() != Res.isDef() &&
-               "Resolved non-local call dep should be def or clobber!");
+        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+                "Resolved non-local call dep should be unknown, def or "
+                "clobber!");
         InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        I->getBB()));
@@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
       bool isClobber = I->first.getInt();
       const BasicBlock *DepBB = I->second;
 
-      OS << "    " << (isClobber ? "Clobber" : "    Def");
+      OS << "    ";
+      if (!DepInst)
+        OS << "Unknown";
+      else if (isClobber)
+        OS << "Clobber";
+      else
+        OS << "    Def";
       if (DepBB) {
         OS << " in block ";
         WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
       }
-      OS << " from: ";
-      if (DepInst == Inst)
-        OS << "<unspecified>";
-      else
-        DepInst->print(OS);
+      if (DepInst) {
+        OS << " from: ";
+        if (DepInst == Inst)
+          OS << "<unspecified>";
+        else
+          DepInst->print(OS);
+      }
       OS << "\n";
     }
 
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 769c68ce425e..53d430491198 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -50,13 +50,8 @@ static bool isMallocCall(const CallInst *CI) {
   const FunctionType *FTy = Callee->getFunctionType();
   if (FTy->getNumParams() != 1)
     return false;
-  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
-    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
-      return false;
-    return true;
-  }
-
-  return false;
+  return FTy->getParamType(0)->isIntegerTy(32) ||
+         FTy->getParamType(0)->isIntegerTy(64);
 }
 
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
@@ -211,7 +206,7 @@ const CallInst *llvm::isFreeCall(const Value *I) {
     return 0;
   if (FTy->getNumParams() != 1)
     return 0;
-  if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
     return 0;
 
   return CI;
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 5f640c01d252..bba4482f4da5 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,6 +47,11 @@ STATISTIC(NumUncacheNonLocalPtr,
 STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+//        (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
 char MemoryDependenceAnalysis::ID = 0;
   
 // Register this pass...
@@ -180,8 +185,16 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
 MemDepResult MemoryDependenceAnalysis::
 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
                           BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  unsigned Limit = BlockScanLimit;
+
   // Walk backwards through the block, looking for dependencies
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases. 
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
     
     // If this inst is a memory op, get the pointer it accessed
@@ -215,11 +228,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
 }
 
 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -322,9 +335,17 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
   const Value *MemLocBase = 0;
   int64_t MemLocOffset = 0;
-  
+
+  unsigned Limit = BlockScanLimit;
+
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
 
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -458,11 +479,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
 }
 
 /// getDependency - Return the instruction on which a memory operation
@@ -490,12 +511,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
-    // No dependence found.  If this is the entry block of the function, it is a
-    // clobber, otherwise it is non-local.
+    // No dependence found.  If this is the entry block of the function, it is
+    // unknown, otherwise it is non-local.
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
     else
-      LocalCache = MemDepResult::getClobber(QueryInst);
+      LocalCache = MemDepResult::getUnknown();
   } else {
     AliasAnalysis::Location MemLoc;
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -514,7 +535,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
                                              QueryParent);
     } else
       // Non-memory instruction.
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+      LocalCache = MemDepResult::getUnknown();
   }
   
   // Remember the result!
@@ -648,10 +669,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
       // No dependence found.  If this is the entry block of the function, it is
-      // a clobber, otherwise it is non-local.
+      // a clobber, otherwise it is unknown.
       Dep = MemDepResult::getNonLocal();
     } else {
-      Dep = MemDepResult::getClobber(ScanPos);
+      Dep = MemDepResult::getUnknown();
     }
     
     // If we had a dirty entry for the block, update it.  Otherwise, just add
@@ -707,7 +728,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB,
-                                     MemDepResult::getClobber(FromBB->begin()),
+                                     MemDepResult::getUnknown(),
                                      const_cast<Value *>(Loc.Ptr)));
 }
 
@@ -769,7 +790,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
-  if (Dep.isNonLocal())
+  if (Dep.isNonLocal() || Dep.isUnknown())
     return Dep;
   
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
@@ -1091,16 +1112,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
       // If getNonLocalPointerDepFromBB fails here, that means the cached
       // result conflicted with the Visited list; we have to conservatively
-      // assume a clobber, but this also does not block PRE of the load.
+      // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
           getNonLocalPointerDepFromBB(PredPointer,
                                       Loc.getWithNewPtr(PredPtrVal),
                                       isLoad, Pred,
                                       Result, Visited)) {
         // Add the entry to the Result list.
-        NonLocalDepResult Entry(Pred,
-                                MemDepResult::getClobber(Pred->getTerminator()),
-                                PredPtrVal);
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
         Result.push_back(Entry);
 
         // Since we had a phi translation failure, the cache for CacheKey won't
@@ -1145,8 +1164,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
     
-    // If *nothing* works, mark the pointer as being clobbered by the first
-    // instruction in this block.
+    // If *nothing* works, mark the pointer as unknown.
     //
     // If this is the magic first block, return this as a clobber of the whole
     // incoming value.  Since we can't phi translate to one of the predecessors,
@@ -1161,8 +1179,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
-      I->setResult(MemDepResult::getClobber(BB->getTerminator()));
-      ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey);
+      I->setResult(MemDepResult::getUnknown());
       Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
                                          Pointer.getAddr()));
       break;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 8e5a40008d88..befe6d2599d6 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
+
 using namespace llvm;
 
 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -159,7 +160,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   }
 
   // If we haven't found this binop, insert it.
-  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+  BO->setDebugLoc(SaveInsertPt->getDebugLoc());
   rememberInstruction(BO);
 
   // Restore the original insert point.
@@ -847,6 +849,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                         const Loop *L,
                                         const Type *ExpandTy,
                                         const Type *IntTy) {
+  assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
   // Reuse a previously-inserted PHI, if present.
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
@@ -871,13 +875,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
           // If any of the operands don't dominate the insert position, bail.
           // Addrec operands are always loop-invariant, so this can only happen
           // if there are instructions which haven't been hoisted.
-          for (User::op_iterator OI = IncV->op_begin()+1,
-               OE = IncV->op_end(); OI != OE; ++OI)
-            if (Instruction *OInst = dyn_cast<Instruction>(OI))
-              if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
-                IncV = 0;
-                break;
-              }
+          if (L == IVIncInsertLoop) {
+            for (User::op_iterator OI = IncV->op_begin()+1,
+                   OE = IncV->op_end(); OI != OE; ++OI)
+              if (Instruction *OInst = dyn_cast<Instruction>(OI))
+                if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+                  IncV = 0;
+                  break;
+                }
+          }
           if (!IncV)
             break;
           // Advance to the next instruction.
@@ -919,6 +925,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
                                 L->getHeader()->begin());
 
+  // StartV must be hoisted into L's preheader to dominate the new phi.
+  assert(!isa<Instruction>(StartV) ||
+         SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
+                                  L->getHeader()));
+
   // Expand code for the step value. Insert instructions right before the
   // terminator corresponding to the back-edge. Do this before creating the PHI
   // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
@@ -935,7 +946,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   BasicBlock *Header = L->getHeader();
   Builder.SetInsertPoint(Header, Header->begin());
   pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
-  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv");
+  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+                                  Twine(IVName) + ".iv");
   rememberInstruction(PN);
 
   // Create the step instructions and populate the PHI.
@@ -953,7 +965,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
     // at IVIncInsertPos.
     Instruction *InsertPos = L == IVIncInsertLoop ?
       IVIncInsertPos : Pred->getTerminator();
-    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+    Builder.SetInsertPoint(InsertPos);
     Value *IncV;
     // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
     if (isPointer) {
@@ -971,8 +983,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       }
     } else {
       IncV = isNegative ?
-        Builder.CreateSub(PN, StepV, "lsr.iv.next") :
-        Builder.CreateAdd(PN, StepV, "lsr.iv.next");
+        Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+        Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
       rememberInstruction(IncV);
     }
     PN->addIncoming(IncV, Pred);
@@ -1155,6 +1167,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
         Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
                                                      "indvar.next",
                                                      HP->getTerminator());
+        Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
         rememberInstruction(Add);
         CanonicalIV->addIncoming(Add, HP);
       } else {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index dab5aebd6c64..455c91077dfb 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1352,14 +1352,15 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
   // we might be able to find the complete struct somewhere.
   
   // Find the value that is at that particular spot
-  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+  Value *V = FindInsertedValue(From, Idxs);
 
   if (!V)
     return NULL;
 
   // Insert the value in the new (sub) aggregrate
-  return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip,
-                                       Idxs.end(), "tmp", InsertBefore);
+  return llvm::InsertValueInst::Create(To, V,
+                                       ArrayRef<unsigned>(Idxs).slice(IdxSkip),
+                                       "tmp", InsertBefore);
 }
 
 // This helper takes a nested struct and extracts a part of it (which is again a
@@ -1374,15 +1375,13 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
 // insertvalue instruction somewhere).
 //
 // All inserted insertvalue instructions are inserted before InsertBefore
-static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
-                                const unsigned *idx_end,
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
                                 Instruction *InsertBefore) {
   assert(InsertBefore && "Must have someplace to insert!");
   const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
-                                                             idx_begin,
-                                                             idx_end);
+                                                             idx_range);
   Value *To = UndefValue::get(IndexedType);
-  SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
+  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
   unsigned IdxSkip = Idxs.size();
 
   return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
@@ -1394,39 +1393,37 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
 ///
 /// If InsertBefore is not null, this function will duplicate (modified)
 /// insertvalues when a part of a nested struct is extracted.
-Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+                               Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
   // recursion)
-  if (idx_begin == idx_end)
+  if (idx_range.empty())
     return V;
   // We have indices, so V should have an indexable type
   assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
          && "Not looking at a struct or array?");
-  assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
          && "Invalid indices for type?");
   const CompositeType *PTy = cast<CompositeType>(V->getType());
 
   if (isa<UndefValue>(V))
     return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
-                                                              idx_begin,
-                                                              idx_end));
+                                                              idx_range));
   else if (isa<ConstantAggregateZero>(V))
     return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
-                                                                  idx_begin,
-                                                                  idx_end));
+                                                                  idx_range));
   else if (Constant *C = dyn_cast<Constant>(V)) {
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
       // Recursively process this constant
-      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
-                               idx_end, InsertBefore);
+      return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
+                               InsertBefore);
   } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
-    const unsigned *req_idx = idx_begin;
+    const unsigned *req_idx = idx_range.begin();
     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
          i != e; ++i, ++req_idx) {
-      if (req_idx == idx_end) {
+      if (req_idx == idx_range.end()) {
         if (InsertBefore)
           // The requested index identifies a part of a nested aggregate. Handle
           // this specially. For example,
@@ -1438,7 +1435,10 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
           // %C = insertvalue {i32, i32 } %A, i32 11, 1
           // which allows the unused 0,0 element from the nested struct to be
           // removed.
-          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+          return BuildSubAggregate(V,
+                                   ArrayRef<unsigned>(idx_range.begin(),
+                                                      req_idx),
+                                   InsertBefore);
         else
           // We can't handle this without inserting insertvalues
           return 0;
@@ -1448,13 +1448,14 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
       // See if the (aggregrate) value inserted into has the value we are
       // looking for, then.
       if (*req_idx != *i)
-        return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
+        return FindInsertedValue(I->getAggregateOperand(), idx_range,
                                  InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
     // the inserted value, passing any remaining indices.
-    return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
+    return FindInsertedValue(I->getInsertedValueOperand(),
+                             ArrayRef<unsigned>(req_idx, idx_range.end()),
                              InsertBefore);
   } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
@@ -1462,24 +1463,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
     // However, we will need to chain I's indices with the requested indices.
    
     // Calculate the number of indices required 
-    unsigned size = I->getNumIndices() + (idx_end - idx_begin);
+    unsigned size = I->getNumIndices() + idx_range.size();
     // Allocate some space to put the new indices in
     SmallVector<unsigned, 5> Idxs;
     Idxs.reserve(size);
     // Add indices from the extract value instruction
-    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
-         i != e; ++i)
-      Idxs.push_back(*i);
+    Idxs.append(I->idx_begin(), I->idx_end());
     
     // Add requested indices
-    for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i)
-      Idxs.push_back(*i);
+    Idxs.append(idx_range.begin(), idx_range.end());
 
     assert(Idxs.size() == size 
            && "Number of indices added not correct?");
     
-    return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
-                             InsertBefore);
+    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
@@ -1783,3 +1780,19 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
   }
   return V;
 }
+
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+    if (!II) return false;
+
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      return false;
+  }
+  return true;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 014e81602d64..3c63106e8c3b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -406,29 +406,20 @@ lltok::Kind LLLexer::LexQuote() {
   return kind;
 }
 
-static bool JustWhitespaceNewLine(const char *&Ptr) {
-  const char *ThisPtr = Ptr;
-  while (*ThisPtr == ' ' || *ThisPtr == '\t')
-    ++ThisPtr;
-  if (*ThisPtr == '\n' || *ThisPtr == '\r') {
-    Ptr = ThisPtr;
-    return true;
-  }
-  return false;
-}
-
 /// LexExclaim:
 ///    !foo
 ///    !
 lltok::Kind LLLexer::LexExclaim() {
   // Lex a metadata name as a MetadataVar.
-  if (isalpha(CurPtr[0])) {
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
     ++CurPtr;
     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
-           CurPtr[0] == '.' || CurPtr[0] == '_')
+           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
       ++CurPtr;
 
     StrVal.assign(TokStart+1, CurPtr);   // Skip !
+    UnEscapeLexed(StrVal);
     return lltok::MetadataVar;
   }
   return lltok::exclaim;
@@ -480,7 +471,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
     return lltok::kw_##STR;
 
-  KEYWORD(begin);   KEYWORD(end);
   KEYWORD(true);    KEYWORD(false);
   KEYWORD(declare); KEYWORD(define);
   KEYWORD(global);  KEYWORD(constant);
@@ -570,6 +560,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(noimplicitfloat);
   KEYWORD(naked);
   KEYWORD(hotpatch);
+  KEYWORD(nonlazybind);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -598,26 +589,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
 #undef TYPEKEYWORD
 
-  // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
-  // to avoid conflicting with the sext/zext instructions, below.
-  if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
-    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
-    if (JustWhitespaceNewLine(CurPtr))
-      return lltok::kw_signext;
-  } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
-    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
-    if (JustWhitespaceNewLine(CurPtr))
-      return lltok::kw_zeroext;
-  } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
-    // FIXME: Remove in LLVM 3.0.
-    // Autoupgrade malloc instruction.
-    return lltok::kw_malloc;
-  } else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
-    // FIXME: Remove in LLVM 3.0.
-    // Autoupgrade malloc instruction.
-    return lltok::kw_free;
-  }
-
   // Keywords for instructions.
 #define INSTKEYWORD(STR, Enum) \
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
@@ -664,7 +635,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(extractelement, ExtractElement);
   INSTKEYWORD(insertelement,  InsertElement);
   INSTKEYWORD(shufflevector,  ShuffleVector);
-  INSTKEYWORD(getresult,      ExtractValue);
   INSTKEYWORD(extractvalue,   ExtractValue);
   INSTKEYWORD(insertvalue,    InsertValue);
 #undef INSTKEYWORD
@@ -689,14 +659,6 @@ lltok::Kind LLLexer::LexIdentifier() {
     return lltok::kw_cc;
   }
 
-  // If this starts with "call", return it as CALL.  This is to support old
-  // broken .ll files.  FIXME: remove this with LLVM 3.0.
-  if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
-    CurPtr = TokStart+4;
-    UIntVal = Instruction::Call;
-    return lltok::kw_call;
-  }
-
   // Finally, if this isn't known, return an error.
   CurPtr = TokStart+1;
   return lltok::Error;
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 4fe705e1a5b5..33b913572375 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -38,7 +38,7 @@ namespace llvm {
     lltok::Kind CurKind;
     std::string StrVal;
     unsigned UIntVal;
-    const Type *TyVal;
+    Type *TyVal;
     APFloat APFloatVal;
     APSInt  APSIntVal;
 
@@ -56,7 +56,7 @@ namespace llvm {
     LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
     lltok::Kind getKind() const { return CurKind; }
     const std::string &getStrVal() const { return StrVal; }
-    const Type *getTyVal() const { return TyVal; }
+    Type *getTyVal() const { return TyVal; }
     unsigned getUIntVal() const { return UIntVal; }
     const APSInt &getAPSIntVal() const { return APSIntVal; }
     const APFloat &getAPFloatVal() const { return APFloatVal; }
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 81e0747266f1..cfc31f3db8a7 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -26,6 +26,13 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static std::string getTypeString(const Type *T) {
+  std::string Result;
+  raw_string_ostream Tmp(Result);
+  Tmp << *T;
+  return Tmp.str();
+}
+
 /// Run: module ::= toplevelentity*
 bool LLParser::Run() {
   // Prime the lexer.
@@ -59,24 +66,6 @@ bool LLParser::ValidateEndOfModule() {
   }
   
   
-  // Update auto-upgraded malloc calls to "malloc".
-  // FIXME: Remove in LLVM 3.0.
-  if (MallocF) {
-    MallocF->setName("malloc");
-    // If setName() does not set the name to "malloc", then there is already a 
-    // declaration of "malloc".  In that case, iterate over all calls to MallocF
-    // and get them to call the declared "malloc" instead.
-    if (MallocF->getName() != "malloc") {
-      Constant *RealMallocF = M->getFunction("malloc");
-      if (RealMallocF->getType() != MallocF->getType())
-        RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType());
-      MallocF->replaceAllUsesWith(RealMallocF);
-      MallocF->eraseFromParent();
-      MallocF = NULL;
-    }
-  }
-  
-  
   // If there are entries in ForwardRefBlockAddresses at this point, they are
   // references after the function was defined.  Resolve those now.
   while (!ForwardRefBlockAddresses.empty()) {
@@ -100,15 +89,16 @@ bool LLParser::ValidateEndOfModule() {
     ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
   }
   
-  
-  if (!ForwardRefTypes.empty())
-    return Error(ForwardRefTypes.begin()->second.second,
-                 "use of undefined type named '" +
-                 ForwardRefTypes.begin()->first + "'");
-  if (!ForwardRefTypeIDs.empty())
-    return Error(ForwardRefTypeIDs.begin()->second.second,
-                 "use of undefined type '%" +
-                 Twine(ForwardRefTypeIDs.begin()->first) + "'");
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
+    if (NumberedTypes[i].second.isValid())
+      return Error(NumberedTypes[i].second,
+                   "use of undefined type '%" + Twine(i) + "'");
+
+  for (StringMap<std::pair<Type*, LocTy> >::iterator I =
+       NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
+    if (I->second.second.isValid())
+      return Error(I->second.second,
+                   "use of undefined type named '" + I->getKey() + "'");
 
   if (!ForwardRefVals.empty())
     return Error(ForwardRefVals.begin()->second.second,
@@ -176,15 +166,12 @@ bool LLParser::ParseTopLevelEntities() {
     switch (Lex.getKind()) {
     default:         return TokError("expected top-level entity");
     case lltok::Eof: return false;
-    //case lltok::kw_define:
     case lltok::kw_declare: if (ParseDeclare()) return true; break;
     case lltok::kw_define:  if (ParseDefine()) return true; break;
     case lltok::kw_module:  if (ParseModuleAsm()) return true; break;
     case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
     case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
-    case lltok::kw_type:    if (ParseUnnamedType()) return true; break;
     case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
-    case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
     case lltok::LocalVar:   if (ParseNamedType()) return true; break;
     case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
@@ -304,45 +291,35 @@ bool LLParser::ParseDepLibs() {
 }
 
 /// ParseUnnamedType:
-///   ::= 'type' type
 ///   ::= LocalVarID '=' 'type' type
 bool LLParser::ParseUnnamedType() {
-  unsigned TypeID = NumberedTypes.size();
-
-  // Handle the LocalVarID form.
-  if (Lex.getKind() == lltok::LocalVarID) {
-    if (Lex.getUIntVal() != TypeID)
-      return Error(Lex.getLoc(), "type expected to be numbered '%" +
-                   Twine(TypeID) + "'");
-    Lex.Lex(); // eat LocalVarID;
-
-    if (ParseToken(lltok::equal, "expected '=' after name"))
-      return true;
-  }
-
   LocTy TypeLoc = Lex.getLoc();
-  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
+  unsigned TypeID = Lex.getUIntVal();
+  Lex.Lex(); // eat LocalVarID;
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
-  if (ParseType(Ty)) return true;
-
-  // See if this type was previously referenced.
-  std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
-    FI = ForwardRefTypeIDs.find(TypeID);
-  if (FI != ForwardRefTypeIDs.end()) {
-    if (FI->second.first.get() == Ty)
-      return Error(TypeLoc, "self referential type is invalid");
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after '='"))
+    return true;
 
-    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
-    Ty = FI->second.first.get();
-    ForwardRefTypeIDs.erase(FI);
+  if (TypeID >= NumberedTypes.size())
+    NumberedTypes.resize(TypeID+1);
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(TypeLoc, "",
+                            NumberedTypes[TypeID], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
+    if (Entry.first)
+      return Error(TypeLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
   }
 
-  NumberedTypes.push_back(Ty);
-
   return false;
 }
 
+
 /// toplevelentity
 ///   ::= LocalVar '=' 'type' type
 bool LLParser::ParseNamedType() {
@@ -350,42 +327,23 @@ bool LLParser::ParseNamedType() {
   LocTy NameLoc = Lex.getLoc();
   Lex.Lex();  // eat LocalVar.
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
-
   if (ParseToken(lltok::equal, "expected '=' after name") ||
-      ParseToken(lltok::kw_type, "expected 'type' after name") ||
-      ParseType(Ty))
+      ParseToken(lltok::kw_type, "expected 'type' after name"))
     return true;
-
-  // Set the type name, checking for conflicts as we do so.
-  bool AlreadyExists = M->addTypeName(Name, Ty);
-  if (!AlreadyExists) return false;
-
-  // See if this type is a forward reference.  We need to eagerly resolve
-  // types to allow recursive type redefinitions below.
-  std::map<std::string, std::pair<PATypeHolder, LocTy> >::iterator
-  FI = ForwardRefTypes.find(Name);
-  if (FI != ForwardRefTypes.end()) {
-    if (FI->second.first.get() == Ty)
-      return Error(NameLoc, "self referential type is invalid");
-
-    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
-    Ty = FI->second.first.get();
-    ForwardRefTypes.erase(FI);
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(NameLoc, Name,
+                            NamedTypes[Name], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
+    if (Entry.first)
+      return Error(NameLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
   }
-
-  // Inserting a name that is already defined, get the existing name.
-  const Type *Existing = M->getTypeByName(Name);
-  assert(Existing && "Conflict but no matching type?!");
-
-  // Otherwise, this is an attempt to redefine a type. That's okay if
-  // the redefinition is identical to the original.
-  // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
-  if (Existing == Ty) return false;
-
-  // Any other kind of (non-equivalent) redefinition is an error.
-  return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
-               Ty->getDescription() + "'");
+  
+  return false;
 }
 
 
@@ -561,7 +519,7 @@ bool LLParser::ParseStandaloneMetadata() {
   unsigned MetadataID = 0;
 
   LocTy TyLoc;
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;
   SmallVector<Value *, 16> Elts;
   if (ParseUInt32(MetadataID) ||
       ParseToken(lltok::equal, "expected '=' here") ||
@@ -693,7 +651,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   LocTy UnnamedAddrLoc;
   LocTy TyLoc;
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;
   if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
       ParseOptionalAddrSpace(AddrSpace) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -811,24 +769,17 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Name + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
-  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
-    // Function types can return opaque but functions can't.
-    if (FT->getReturnType()->isOpaqueTy()) {
-      Error(Loc, "function may not return opaque type");
-      return 0;
-    }
-
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
-  } else {
+  else
     FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
                                 GlobalValue::ExternalWeakLinkage, 0, Name);
-  }
 
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
@@ -856,23 +807,17 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
-  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
-    // Function types can return opaque but functions can't.
-    if (FT->getReturnType()->isOpaqueTy()) {
-      Error(Loc, "function may not return opaque type");
-      return 0;
-    }
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
-  } else {
+  else
     FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
                                 GlobalValue::ExternalWeakLinkage, 0, "");
-  }
 
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
@@ -931,33 +876,23 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
 /// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
 /// indicates what kind of attribute list this is: 0: function arg, 1: result,
 /// 2: function attr.
-/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0
 bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
   Attrs = Attribute::None;
   LocTy AttrLoc = Lex.getLoc();
 
   while (1) {
     switch (Lex.getKind()) {
-    case lltok::kw_sext:
-    case lltok::kw_zext:
-      // Treat these as signext/zeroext if they occur in the argument list after
-      // the value, as in "call i8 @foo(i8 10 sext)".  If they occur before the
-      // value, as in "call i8 @foo(i8 sext (" then it is part of a constant
-      // expr.
-      // FIXME: REMOVE THIS IN LLVM 3.0
-      if (AttrKind == 3) {
-        if (Lex.getKind() == lltok::kw_sext)
-          Attrs |= Attribute::SExt;
-        else
-          Attrs |= Attribute::ZExt;
-        break;
-      }
-      // FALL THROUGH.
     default:  // End of attributes.
       if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
         return Error(AttrLoc, "invalid use of function-only attribute");
 
-      if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly))
+      // As a hack, we allow "align 2" on functions as a synonym for
+      // "alignstack 2".
+      if (AttrKind == 2 &&
+          (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
+        return Error(AttrLoc, "invalid use of attribute on a function");
+
+      if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
         return Error(AttrLoc, "invalid use of parameter-only attribute");
 
       return false;
@@ -985,6 +920,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
     case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
+    case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
 
     case lltok::kw_alignstack: {
       unsigned Alignment;
@@ -1262,166 +1198,68 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
 // Type Parsing.
 //===----------------------------------------------------------------------===//
 
-/// ParseType - Parse and resolve a full type.
-bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) {
-  LocTy TypeLoc = Lex.getLoc();
-  if (ParseTypeRec(Result)) return true;
-
-  // Verify no unresolved uprefs.
-  if (!UpRefs.empty())
-    return Error(UpRefs.back().Loc, "invalid unresolved type up reference");
-
-  if (!AllowVoid && Result.get()->isVoidTy())
-    return Error(TypeLoc, "void type only allowed for function results");
-
-  return false;
-}
-
-/// HandleUpRefs - Every time we finish a new layer of types, this function is
-/// called.  It loops through the UpRefs vector, which is a list of the
-/// currently active types.  For each type, if the up-reference is contained in
-/// the newly completed type, we decrement the level count.  When the level
-/// count reaches zero, the up-referenced type is the type that is passed in:
-/// thus we can complete the cycle.
-///
-PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
-  // If Ty isn't abstract, or if there are no up-references in it, then there is
-  // nothing to resolve here.
-  if (!ty->isAbstract() || UpRefs.empty()) return ty;
-
-  PATypeHolder Ty(ty);
-#if 0
-  dbgs() << "Type '" << Ty->getDescription()
-         << "' newly formed.  Resolving upreferences.\n"
-         << UpRefs.size() << " upreferences active!\n";
-#endif
-
-  // If we find any resolvable upreferences (i.e., those whose NestingLevel goes
-  // to zero), we resolve them all together before we resolve them to Ty.  At
-  // the end of the loop, if there is anything to resolve to Ty, it will be in
-  // this variable.
-  OpaqueType *TypeToResolve = 0;
-
-  for (unsigned i = 0; i != UpRefs.size(); ++i) {
-    // Determine if 'Ty' directly contains this up-references 'LastContainedTy'.
-    bool ContainsType =
-      std::find(Ty->subtype_begin(), Ty->subtype_end(),
-                UpRefs[i].LastContainedTy) != Ty->subtype_end();
-
-#if 0
-    dbgs() << "  UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
-           << UpRefs[i].LastContainedTy->getDescription() << ") = "
-           << (ContainsType ? "true" : "false")
-           << " level=" << UpRefs[i].NestingLevel << "\n";
-#endif
-    if (!ContainsType)
-      continue;
-
-    // Decrement level of upreference
-    unsigned Level = --UpRefs[i].NestingLevel;
-    UpRefs[i].LastContainedTy = Ty;
-
-    // If the Up-reference has a non-zero level, it shouldn't be resolved yet.
-    if (Level != 0)
-      continue;
-
-#if 0
-    dbgs() << "  * Resolving upreference for " << UpRefs[i].UpRefTy << "\n";
-#endif
-    if (!TypeToResolve)
-      TypeToResolve = UpRefs[i].UpRefTy;
-    else
-      UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve);
-    UpRefs.erase(UpRefs.begin()+i);     // Remove from upreference list.
-    --i;                                // Do not skip the next element.
-  }
-
-  if (TypeToResolve)
-    TypeToResolve->refineAbstractTypeTo(Ty);
-
-  return Ty;
-}
-
-
-/// ParseTypeRec - The recursive function used to process the internal
-/// implementation details of types.
-bool LLParser::ParseTypeRec(PATypeHolder &Result) {
+/// ParseType - Parse a type.
+bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
+  SMLoc TypeLoc = Lex.getLoc();
   switch (Lex.getKind()) {
   default:
     return TokError("expected type");
   case lltok::Type:
-    // TypeRec ::= 'float' | 'void' (etc)
+    // Type ::= 'float' | 'void' (etc)
     Result = Lex.getTyVal();
     Lex.Lex();
     break;
-  case lltok::kw_opaque:
-    // TypeRec ::= 'opaque'
-    Result = OpaqueType::get(Context);
-    Lex.Lex();
-    break;
   case lltok::lbrace:
-    // TypeRec ::= '{' ... '}'
-    if (ParseStructType(Result, false))
+    // Type ::= StructType
+    if (ParseAnonStructType(Result, false))
       return true;
     break;
   case lltok::lsquare:
-    // TypeRec ::= '[' ... ']'
+    // Type ::= '[' ... ']'
     Lex.Lex(); // eat the lsquare.
     if (ParseArrayVectorType(Result, false))
       return true;
     break;
   case lltok::less: // Either vector or packed struct.
-    // TypeRec ::= '<' ... '>'
+    // Type ::= '<' ... '>'
     Lex.Lex();
     if (Lex.getKind() == lltok::lbrace) {
-      if (ParseStructType(Result, true) ||
+      if (ParseAnonStructType(Result, true) ||
           ParseToken(lltok::greater, "expected '>' at end of packed struct"))
         return true;
     } else if (ParseArrayVectorType(Result, true))
       return true;
     break;
-  case lltok::LocalVar:
-  case lltok::StringConstant:  // FIXME: REMOVE IN LLVM 3.0
-    // TypeRec ::= %foo
-    if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
-      Result = T;
-    } else {
-      Result = OpaqueType::get(Context);
-      ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
-                                            std::make_pair(Result,
-                                                           Lex.getLoc())));
-      M->addTypeName(Lex.getStrVal(), Result.get());
+  case lltok::LocalVar: {
+    // Type ::= %foo
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, Lex.getStrVal());
+      Entry.second = Lex.getLoc();
     }
+    Result = Entry.first;
     Lex.Lex();
     break;
+  }
 
-  case lltok::LocalVarID:
-    // TypeRec ::= %4
-    if (Lex.getUIntVal() < NumberedTypes.size())
-      Result = NumberedTypes[Lex.getUIntVal()];
-    else {
-      std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
-        I = ForwardRefTypeIDs.find(Lex.getUIntVal());
-      if (I != ForwardRefTypeIDs.end())
-        Result = I->second.first;
-      else {
-        Result = OpaqueType::get(Context);
-        ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
-                                                std::make_pair(Result,
-                                                               Lex.getLoc())));
-      }
+  case lltok::LocalVarID: {
+    // Type ::= %4
+    if (Lex.getUIntVal() >= NumberedTypes.size())
+      NumberedTypes.resize(Lex.getUIntVal()+1);
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, "");
+      Entry.second = Lex.getLoc();
     }
+    Result = Entry.first;
     Lex.Lex();
     break;
-  case lltok::backslash: {
-    // TypeRec ::= '\' 4
-    Lex.Lex();
-    unsigned Val;
-    if (ParseUInt32(Val)) return true;
-    OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder.
-    UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
-    Result = OT;
-    break;
   }
   }
 
@@ -1429,34 +1267,37 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
   while (1) {
     switch (Lex.getKind()) {
     // End of type.
-    default: return false;
+    default:
+      if (!AllowVoid && Result->isVoidTy())
+        return Error(TypeLoc, "void type only allowed for function results");
+      return false;
 
-    // TypeRec ::= TypeRec '*'
+    // Type ::= Type '*'
     case lltok::star:
-      if (Result.get()->isLabelTy())
+      if (Result->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get()->isVoidTy())
-        return TokError("pointers to void are invalid; use i8* instead");
-      if (!PointerType::isValidElementType(Result.get()))
+      if (Result->isVoidTy())
+        return TokError("pointers to void are invalid - use i8* instead");
+      if (!PointerType::isValidElementType(Result))
         return TokError("pointer to this type is invalid");
-      Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
+      Result = PointerType::getUnqual(Result);
       Lex.Lex();
       break;
 
-    // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*'
+    // Type ::= Type 'addrspace' '(' uint32 ')' '*'
     case lltok::kw_addrspace: {
-      if (Result.get()->isLabelTy())
+      if (Result->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get()->isVoidTy())
+      if (Result->isVoidTy())
         return TokError("pointers to void are invalid; use i8* instead");
-      if (!PointerType::isValidElementType(Result.get()))
+      if (!PointerType::isValidElementType(Result))
         return TokError("pointer to this type is invalid");
       unsigned AddrSpace;
       if (ParseOptionalAddrSpace(AddrSpace) ||
           ParseToken(lltok::star, "expected '*' in address space"))
         return true;
 
-      Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
+      Result = PointerType::get(Result, AddrSpace);
       break;
     }
 
@@ -1487,7 +1328,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 
     // Parse the argument.
     LocTy ArgLoc;
-    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    Type *ArgTy = 0;
     unsigned ArgAttrs1 = Attribute::None;
     unsigned ArgAttrs2 = Attribute::None;
     Value *V;
@@ -1495,11 +1336,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
       return true;
 
     // Otherwise, handle normal operands.
-    if (ParseOptionalAttrs(ArgAttrs1, 0) ||
-        ParseValue(ArgTy, V, PFS) ||
-        // FIXME: Should not allow attributes after the argument, remove this
-        // in LLVM 3.0.
-        ParseOptionalAttrs(ArgAttrs2, 3))
+    if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS))
       return true;
     ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
   }
@@ -1511,7 +1348,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 
 
 /// ParseArgumentList - Parse the argument list for a function type or function
-/// prototype.  If 'inType' is true then we are parsing a FunctionType.
+/// prototype.
 ///   ::= '(' ArgTypeListI ')'
 /// ArgTypeListI
 ///   ::= /*empty*/
@@ -1519,8 +1356,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 ///   ::= ArgTypeList ',' '...'
 ///   ::= ArgType (',' ArgType)*
 ///
-bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
-                                 bool &isVarArg, bool inType) {
+bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+                                 bool &isVarArg){
   isVarArg = false;
   assert(Lex.getKind() == lltok::lparen);
   Lex.Lex(); // eat the (.
@@ -1532,21 +1369,17 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
     Lex.Lex();
   } else {
     LocTy TypeLoc = Lex.getLoc();
-    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    Type *ArgTy = 0;
     unsigned Attrs;
     std::string Name;
 
-    // If we're parsing a type, use ParseTypeRec, because we allow recursive
-    // types (such as a function returning a pointer to itself).  If parsing a
-    // function prototype, we require fully resolved types.
-    if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+    if (ParseType(ArgTy) ||
         ParseOptionalAttrs(Attrs, 0)) return true;
 
     if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
 
-    if (Lex.getKind() == lltok::LocalVar ||
-        Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+    if (Lex.getKind() == lltok::LocalVar) {
       Name = Lex.getStrVal();
       Lex.Lex();
     }
@@ -1565,21 +1398,19 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
       // Otherwise must be an argument type.
       TypeLoc = Lex.getLoc();
-      if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
-          ParseOptionalAttrs(Attrs, 0)) return true;
+      if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
 
       if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
 
-      if (Lex.getKind() == lltok::LocalVar ||
-          Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+      if (Lex.getKind() == lltok::LocalVar) {
         Name = Lex.getStrVal();
         Lex.Lex();
       } else {
         Name = "";
       }
 
-      if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
+      if (!ArgTy->isFirstClassType())
         return Error(TypeLoc, "invalid type for function argument");
 
       ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
@@ -1591,94 +1422,142 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
 /// ParseFunctionType
 ///  ::= Type ArgumentList OptionalAttrs
-bool LLParser::ParseFunctionType(PATypeHolder &Result) {
+bool LLParser::ParseFunctionType(Type *&Result) {
   assert(Lex.getKind() == lltok::lparen);
 
   if (!FunctionType::isValidReturnType(Result))
     return TokError("invalid function return type");
 
-  std::vector<ArgInfo> ArgList;
+  SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
-  unsigned Attrs;
-  if (ParseArgumentList(ArgList, isVarArg, true) ||
-      // FIXME: Allow, but ignore attributes on function types!
-      // FIXME: Remove in LLVM 3.0
-      ParseOptionalAttrs(Attrs, 2))
+  if (ParseArgumentList(ArgList, isVarArg))
     return true;
 
   // Reject names on the arguments lists.
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (!ArgList[i].Attrs != 0) {
-      // Allow but ignore attributes on function types; this permits
-      // auto-upgrade.
-      // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
-    }
+    if (ArgList[i].Attrs != 0)
+      return Error(ArgList[i].Loc,
+                   "argument attributes invalid in function type");
   }
 
-  std::vector<const Type*> ArgListTy;
+  SmallVector<Type*, 16> ArgListTy;
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
-    ArgListTy.push_back(ArgList[i].Type);
+    ArgListTy.push_back(ArgList[i].Ty);
+
+  Result = FunctionType::get(Result, ArgListTy, isVarArg);
+  return false;
+}
 
-  Result = HandleUpRefs(FunctionType::get(Result.get(),
-                                                ArgListTy, isVarArg));
+/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// other structs.
+bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+  SmallVector<Type*, 8> Elts;
+  if (ParseStructBody(Elts)) return true;
+  
+  Result = StructType::get(Context, Elts, Packed);
   return false;
 }
 
+/// ParseStructDefinition - Parse a struct in a 'type' definition.
+bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                                     std::pair<Type*, LocTy> &Entry,
+                                     Type *&ResultTy) {
+  // If the type was already defined, diagnose the redefinition.
+  if (Entry.first && !Entry.second.isValid())
+    return Error(TypeLoc, "redefinition of type");
+  
+  // If we have opaque, just return without filling in the definition for the
+  // struct.  This counts as a definition as far as the .ll file goes.
+  if (EatIfPresent(lltok::kw_opaque)) {
+    // This type is being defined, so clear the location to indicate this.
+    Entry.second = SMLoc();
+    
+    // If this type number has never been uttered, create it.
+    if (Entry.first == 0)
+      Entry.first = StructType::createNamed(Context, Name);
+    ResultTy = Entry.first;
+    return false;
+  }
+  
+  // If the type starts with '<', then it is either a packed struct or a vector.
+  bool isPacked = EatIfPresent(lltok::less);
+
+  // If we don't have a struct, then we have a random type alias, which we
+  // accept for compatibility with old files.  These types are not allowed to be
+  // forward referenced and not allowed to be recursive.
+  if (Lex.getKind() != lltok::lbrace) {
+    if (Entry.first)
+      return Error(TypeLoc, "forward references to non-struct type");
+  
+    ResultTy = 0;
+    if (isPacked)
+      return ParseArrayVectorType(ResultTy, true);
+    return ParseType(ResultTy);
+  }
+                               
+  // This type is being defined, so clear the location to indicate this.
+  Entry.second = SMLoc();
+  
+  // If this type number has never been uttered, create it.
+  if (Entry.first == 0)
+    Entry.first = StructType::createNamed(Context, Name);
+  
+  StructType *STy = cast<StructType>(Entry.first);
+ 
+  SmallVector<Type*, 8> Body;
+  if (ParseStructBody(Body) ||
+      (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+    return true;
+  
+  STy->setBody(Body, isPacked);
+  ResultTy = STy;
+  return false;
+}
+
+
 /// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
-///   TypeRec
+///   StructType
 ///     ::= '{' '}'
-///     ::= '{' TypeRec (',' TypeRec)* '}'
+///     ::= '{' Type (',' Type)* '}'
 ///     ::= '<' '{' '}' '>'
-///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
-bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
+///     ::= '<' '{' Type (',' Type)* '}' '>'
+bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
   assert(Lex.getKind() == lltok::lbrace);
   Lex.Lex(); // Consume the '{'
 
-  if (EatIfPresent(lltok::rbrace)) {
-    Result = StructType::get(Context, Packed);
+  // Handle the empty struct.
+  if (EatIfPresent(lltok::rbrace))
     return false;
-  }
 
-  std::vector<PATypeHolder> ParamsList;
   LocTy EltTyLoc = Lex.getLoc();
-  if (ParseTypeRec(Result)) return true;
-  ParamsList.push_back(Result);
+  Type *Ty = 0;
+  if (ParseType(Ty)) return true;
+  Body.push_back(Ty);
 
-  if (Result->isVoidTy())
-    return Error(EltTyLoc, "struct element can not have void type");
-  if (!StructType::isValidElementType(Result))
+  if (!StructType::isValidElementType(Ty))
     return Error(EltTyLoc, "invalid element type for struct");
 
   while (EatIfPresent(lltok::comma)) {
     EltTyLoc = Lex.getLoc();
-    if (ParseTypeRec(Result)) return true;
+    if (ParseType(Ty)) return true;
 
-    if (Result->isVoidTy())
-      return Error(EltTyLoc, "struct element can not have void type");
-    if (!StructType::isValidElementType(Result))
+    if (!StructType::isValidElementType(Ty))
       return Error(EltTyLoc, "invalid element type for struct");
 
-    ParamsList.push_back(Result);
+    Body.push_back(Ty);
   }
 
-  if (ParseToken(lltok::rbrace, "expected '}' at end of struct"))
-    return true;
-
-  std::vector<const Type*> ParamsListTy;
-  for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
-    ParamsListTy.push_back(ParamsList[i].get());
-  Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed));
-  return false;
+  return ParseToken(lltok::rbrace, "expected '}' at end of struct");
 }
 
 /// ParseArrayVectorType - Parse an array or vector type, assuming the first
 /// token has already been consumed.
-///   TypeRec
+///   Type
 ///     ::= '[' APSINTVAL 'x' Types ']'
 ///     ::= '<' APSINTVAL 'x' Types '>'
-bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
+bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
       Lex.getAPSIntVal().getBitWidth() > 64)
     return TokError("expected number in address space");
@@ -1691,11 +1570,8 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
       return true;
 
   LocTy TypeLoc = Lex.getLoc();
-  PATypeHolder EltTy(Type::getVoidTy(Context));
-  if (ParseTypeRec(EltTy)) return true;
-
-  if (EltTy->isVoidTy())
-    return Error(TypeLoc, "array and vector element type cannot be void");
+  Type *EltTy = 0;
+  if (ParseType(EltTy)) return true;
 
   if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
                  "expected end of sequential type"))
@@ -1712,7 +1588,7 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
-    Result = HandleUpRefs(ArrayType::get(EltTy, Size));
+    Result = ArrayType::get(EltTy, Size);
   }
   return false;
 }
@@ -1812,12 +1688,12 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
       P.Error(Loc, "'%" + Name + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Name + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Don't make placeholders with invalid type.
-  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1854,11 +1730,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
       P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
-  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1902,7 +1778,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
     if (FI != ForwardRefValIDs.end()) {
       if (FI->second.first->getType() != Inst->getType())
         return P.Error(NameLoc, "instruction forward referenced with type '" +
-                       FI->second.first->getType()->getDescription() + "'");
+                       getTypeString(FI->second.first->getType()) + "'");
       FI->second.first->replaceAllUsesWith(Inst);
       delete FI->second.first;
       ForwardRefValIDs.erase(FI);
@@ -1918,7 +1794,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
   if (FI != ForwardRefVals.end()) {
     if (FI->second.first->getType() != Inst->getType())
       return P.Error(NameLoc, "instruction forward referenced with type '" +
-                     FI->second.first->getType()->getDescription() + "'");
+                     getTypeString(FI->second.first->getType()) + "'");
     FI->second.first->replaceAllUsesWith(Inst);
     delete FI->second.first;
     ForwardRefVals.erase(FI);
@@ -2001,7 +1877,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.Kind = ValID::t_LocalID;
     break;
   case lltok::LocalVar:  // %foo
-  case lltok::StringConstant:  // "foo" - FIXME: REMOVE IN LLVM 3.0
     ID.StrVal = Lex.getStrVal();
     ID.Kind = ValID::t_LocalName;
     break;
@@ -2035,9 +1910,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
 
-    ID.ConstantVal = ConstantStruct::get(Context, Elts.data(),
-                                         Elts.size(), false);
-    ID.Kind = ValID::t_Constant;
+    ID.ConstantStructElts = new Constant*[Elts.size()];
+    ID.UIntVal = Elts.size();
+    memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+    ID.Kind = ValID::t_ConstantStruct;
     return false;
   }
   case lltok::less: {
@@ -2055,9 +1931,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (isPackedStruct) {
-      ID.ConstantVal =
-        ConstantStruct::get(Context, Elts.data(), Elts.size(), true);
-      ID.Kind = ValID::t_Constant;
+      ID.ConstantStructElts = new Constant*[Elts.size()];
+      memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+      ID.UIntVal = Elts.size();
+      ID.Kind = ValID::t_PackedConstantStruct;
       return false;
     }
 
@@ -2074,7 +1951,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "vector element #" + Twine(i) +
-                    " is not of type '" + Elts[0]->getType()->getDescription());
+                    " is not of type '" + getTypeString(Elts[0]->getType()));
 
     ID.ConstantVal = ConstantVector::get(Elts);
     ID.Kind = ValID::t_Constant;
@@ -2098,7 +1975,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!Elts[0]->getType()->isFirstClassType())
       return Error(FirstEltLoc, "invalid array element type: " +
-                   Elts[0]->getType()->getDescription());
+                   getTypeString(Elts[0]->getType()));
 
     ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
 
@@ -2107,10 +1984,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "array element #" + Twine(i) +
-                     " is not of type '" +Elts[0]->getType()->getDescription());
+                     " is not of type '" + getTypeString(Elts[0]->getType()));
     }
 
-    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+    ID.ConstantVal = ConstantArray::get(ATy, Elts);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2179,7 +2056,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   case lltok::kw_inttoptr:
   case lltok::kw_ptrtoint: {
     unsigned Opc = Lex.getUIntVal();
-    PATypeHolder DestTy(Type::getVoidTy(Context));
+    Type *DestTy = 0;
     Constant *SrcVal;
     Lex.Lex();
     if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
@@ -2190,8 +2067,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
       return Error(ID.Loc, "invalid cast opcode for cast from '" +
-                   SrcVal->getType()->getDescription() + "' to '" +
-                   DestTy->getDescription() + "'");
+                   getTypeString(SrcVal->getType()) + "' to '" +
+                   getTypeString(DestTy) + "'");
     ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
                                                  SrcVal, DestTy);
     ID.Kind = ValID::t_Constant;
@@ -2209,11 +2086,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!Val->getType()->isAggregateType())
       return Error(ID.Loc, "extractvalue operand must be aggregate type");
-    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
-                                          Indices.end()))
+    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
       return Error(ID.Loc, "invalid indices for extractvalue");
-    ID.ConstantVal =
-      ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
+    ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2230,11 +2105,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!Val0->getType()->isAggregateType())
       return Error(ID.Loc, "insertvalue operand must be aggregate type");
-    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
-                                          Indices.end()))
+    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
       return Error(ID.Loc, "invalid indices for insertvalue");
-    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1,
-                       Indices.data(), Indices.size());
+    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2462,9 +2335,9 @@ bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
 }
 
 bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
-  PATypeHolder Type(Type::getVoidTy(Context));
-  return ParseType(Type) ||
-         ParseGlobalValue(Type, V);
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseGlobalValue(Ty, V);
 }
 
 /// ParseGlobalValueVector
@@ -2600,7 +2473,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     if (V->getType() != Ty)
       return Error(ID.Loc, "floating point constant does not have type '" +
-                   Ty->getDescription() + "'");
+                   getTypeString(Ty) + "'");
 
     return false;
   case ValID::t_Null:
@@ -2610,8 +2483,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     return false;
   case ValID::t_Undef:
     // FIXME: LabelTy should not be a first-class type.
-    if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
-        !Ty->isOpaqueTy())
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
       return Error(ID.Loc, "invalid type for undef constant");
     V = UndefValue::get(Ty);
     return false;
@@ -2632,20 +2504,40 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     V = ID.ConstantVal;
     return false;
+  case ValID::t_ConstantStruct:
+  case ValID::t_PackedConstantStruct:
+    if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+      if (ST->getNumElements() != ID.UIntVal)
+        return Error(ID.Loc,
+                     "initializer with struct type has wrong # elements");
+      if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
+        return Error(ID.Loc, "packed'ness of initializer and type don't match");
+        
+      // Verify that the elements are compatible with the structtype.
+      for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
+        if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
+          return Error(ID.Loc, "element " + Twine(i) +
+                    " of struct initializer doesn't match struct element type");
+      
+      V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts,
+                                                      ID.UIntVal));
+    } else
+      return Error(ID.Loc, "constant expression type mismatch");
+    return false;
   }
 }
 
-bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) {
   V = 0;
   ValID ID;
-  return ParseValID(ID, &PFS) ||
-         ConvertValIDToValue(Ty, ID, V, &PFS);
+  return ParseValID(ID, PFS) ||
+         ConvertValIDToValue(Ty, ID, V, PFS);
 }
 
-bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
-  PATypeHolder T(Type::getVoidTy(Context));
-  return ParseType(T) ||
-         ParseValue(T, V, PFS);
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseValue(Ty, V, PFS);
 }
 
 bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
@@ -2671,7 +2563,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   unsigned Visibility, RetAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc = Lex.getLoc();
   if (ParseOptionalLinkage(Linkage) ||
       ParseOptionalVisibility(Visibility) ||
@@ -2708,8 +2600,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
     return Error(LinkageLoc, "invalid function linkage type");
   }
 
-  if (!FunctionType::isValidReturnType(RetType) ||
-      RetType->isOpaqueTy())
+  if (!FunctionType::isValidReturnType(RetType))
     return Error(RetTypeLoc, "invalid function return type");
 
   LocTy NameLoc = Lex.getLoc();
@@ -2732,7 +2623,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (Lex.getKind() != lltok::lparen)
     return TokError("expected '(' in function argument list");
 
-  std::vector<ArgInfo> ArgList;
+  SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
   unsigned FuncAttrs;
   std::string Section;
@@ -2741,7 +2632,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   bool UnnamedAddr;
   LocTy UnnamedAddrLoc;
 
-  if (ParseArgumentList(ArgList, isVarArg, false) ||
+  if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
                          &UnnamedAddrLoc) ||
       ParseOptionalAttrs(FuncAttrs, 2) ||
@@ -2760,21 +2651,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   // Okay, if we got here, the function is syntactically valid.  Convert types
   // and do semantic checks.
-  std::vector<const Type*> ParamTypeList;
+  std::vector<Type*> ParamTypeList;
   SmallVector<AttributeWithIndex, 8> Attrs;
-  // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function
-  // attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FuncAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FuncAttrs & ObsoleteFuncAttrs;
-    FuncAttrs &= ~ObsoleteFuncAttrs;
-  }
 
   if (RetAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
-    ParamTypeList.push_back(ArgList[i].Type);
+    ParamTypeList.push_back(ArgList[i].Ty);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
@@ -2805,21 +2689,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
-      // If this function already exists in the symbol table, then it is
-      // multiply defined.  We accept a few cases for old backwards compat.
-      // FIXME: Remove this stuff for LLVM 3.0.
-      if (Fn->getType() != PFT || Fn->getAttributes() != PAL ||
-          (!Fn->isDeclaration() && isDefine)) {
-        // If the redefinition has different type or different attributes,
-        // reject it.  If both have bodies, reject it.
-        return Error(NameLoc, "invalid redefinition of function '" +
-                     FunctionName + "'");
-      } else if (Fn->isDeclaration()) {
-        // Make sure to strip off any argument names so we can't get conflicts.
-        for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
-             AI != AE; ++AI)
-          AI->setName("");
-      }
+      // Reject redefinitions.
+      return Error(NameLoc, "invalid redefinition of function '" +
+                   FunctionName + "'");
     } else if (M->getNamedValue(FunctionName)) {
       return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
     }
@@ -2858,10 +2730,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
-    // If we run out of arguments in the Function prototype, exit early.
-    // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above.
-    if (ArgIt == Fn->arg_end()) break;
-    
     // If the argument has a name, insert it into the argument symbol table.
     if (ArgList[i].Name.empty()) continue;
 
@@ -2879,10 +2747,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
 /// ParseFunctionBody
 ///   ::= '{' BasicBlock+ '}'
-///   ::= 'begin' BasicBlock+ 'end'  // FIXME: remove in LLVM 3.0
 ///
 bool LLParser::ParseFunctionBody(Function &Fn) {
-  if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
+  if (Lex.getKind() != lltok::lbrace)
     return TokError("expected '{' in function body");
   Lex.Lex();  // eat the {.
 
@@ -2892,10 +2759,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   PerFunctionState PFS(*this, Fn, FunctionNumber);
 
   // We need at least one basic block.
-  if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end)
+  if (Lex.getKind() == lltok::rbrace)
     return TokError("function body requires at least one basic block");
   
-  while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
+  while (Lex.getKind() != lltok::rbrace)
     if (ParseBasicBlock(PFS)) return true;
 
   // Eat the }.
@@ -2936,9 +2803,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction id"))
         return true;
-    } else if (Lex.getKind() == lltok::LocalVar ||
-               // FIXME: REMOVE IN LLVM 3.0
-               Lex.getKind() == lltok::StringConstant) {
+    } else if (Lex.getKind() == lltok::LocalVar) {
       NameStr = Lex.getStrVal();
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction name"))
@@ -3062,8 +2927,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
-  case lltok::kw_malloc:         return ParseAlloc(Inst, PFS, BB, false);
-  case lltok::kw_free:           return ParseFree(Inst, PFS, BB);
   case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
   case lltok::kw_store:          return ParseStore(Inst, PFS, false);
   case lltok::kw_volatile:
@@ -3073,7 +2936,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
       return ParseStore(Inst, PFS, true);
     else
       return TokError("expected 'load' or 'store'");
-  case lltok::kw_getresult:     return ParseGetResult(Inst, PFS);
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
   case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
   case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
@@ -3128,14 +2990,19 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
 /// ParseRet - Parse a return instruction.
 ///   ::= 'ret' void (',' !dbg, !1)*
 ///   ::= 'ret' TypeAndValue (',' !dbg, !1)*
-///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  (',' !dbg, !1)*
-///         [[obsolete: LLVM 3.0]]
-int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
-                       PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+                        PerFunctionState &PFS) {
+  SMLoc TypeLoc = Lex.getLoc();
+  Type *Ty = 0;
   if (ParseType(Ty, true /*void allowed*/)) return true;
 
+  Type *ResType = PFS.getFunction().getReturnType();
+  
   if (Ty->isVoidTy()) {
+    if (!ResType->isVoidTy())
+      return Error(TypeLoc, "value doesn't match function result type '" +
+                   getTypeString(ResType) + "'");
+    
     Inst = ReturnInst::Create(Context);
     return false;
   }
@@ -3143,38 +3010,12 @@ int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   Value *RV;
   if (ParseValue(Ty, RV, PFS)) return true;
 
-  bool ExtraComma = false;
-  if (EatIfPresent(lltok::comma)) {
-    // Parse optional custom metadata, e.g. !dbg
-    if (Lex.getKind() == lltok::MetadataVar) {
-      ExtraComma = true;
-    } else {
-      // The normal case is one return value.
-      // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring
-      // use of 'ret {i32,i32} {i32 1, i32 2}'
-      SmallVector<Value*, 8> RVs;
-      RVs.push_back(RV);
-
-      do {
-        // If optional custom metadata, e.g. !dbg is seen then this is the 
-        // end of MRV.
-        if (Lex.getKind() == lltok::MetadataVar)
-          break;
-        if (ParseTypeAndValue(RV, PFS)) return true;
-        RVs.push_back(RV);
-      } while (EatIfPresent(lltok::comma));
-
-      RV = UndefValue::get(PFS.getFunction().getReturnType());
-      for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
-        Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
-        BB->getInstList().push_back(I);
-        RV = I;
-      }
-    }
-  }
-
+  if (ResType != RV->getType())
+    return Error(TypeLoc, "value doesn't match function result type '" +
+                 getTypeString(ResType) + "'");
+  
   Inst = ReturnInst::Create(Context, RV);
-  return ExtraComma ? InstExtraComma : InstNormal;
+  return false;
 }
 
 
@@ -3300,7 +3141,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
   unsigned RetAttrs, FnAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
@@ -3326,7 +3167,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
       !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
     // Pull out the types of all of the arguments...
-    std::vector<const Type*> ParamTypes;
+    std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
 
@@ -3341,14 +3182,6 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
-  // function attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FnAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
-    FnAttrs &= ~ObsoleteFuncAttrs;
-  }
-
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
@@ -3370,7 +3203,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3385,8 +3218,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB,
-                                      Args.begin(), Args.end());
+  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
   II->setCallingConv(CC);
   II->setAttributes(PAL);
   Inst = II;
@@ -3486,8 +3318,9 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
 ///   ::= CastOpc TypeAndValue 'to' Type
 bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
                          unsigned Opc) {
-  LocTy Loc;  Value *Op;
-  PATypeHolder DestTy(Type::getVoidTy(Context));
+  LocTy Loc;
+  Value *Op;
+  Type *DestTy = 0;
   if (ParseTypeAndValue(Op, Loc, PFS) ||
       ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
       ParseType(DestTy))
@@ -3496,8 +3329,8 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
   if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
     CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
     return Error(Loc, "invalid cast opcode for cast from '" +
-                 Op->getType()->getDescription() + "' to '" +
-                 DestTy->getDescription() + "'");
+                 getTypeString(Op->getType()) + "' to '" +
+                 getTypeString(DestTy) + "'");
   }
   Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
   return false;
@@ -3526,7 +3359,7 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'va_arg' TypeAndValue ',' Type
 bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Op;
-  PATypeHolder EltTy(Type::getVoidTy(Context));
+  Type *EltTy = 0;
   LocTy TypeLoc;
   if (ParseTypeAndValue(Op, PFS) ||
       ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
@@ -3598,11 +3431,10 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParsePHI
 ///   ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
 int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;  LocTy TypeLoc;
   Value *Op0, *Op1;
-  LocTy TypeLoc = Lex.getLoc();
 
-  if (ParseType(Ty) ||
+  if (ParseType(Ty, TypeLoc) ||
       ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
       ParseValue(Ty, Op0, PFS) ||
       ParseToken(lltok::comma, "expected ',' after insertelement value") ||
@@ -3648,7 +3480,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
   unsigned RetAttrs, FnAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
@@ -3671,7 +3503,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
       !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
     // Pull out the types of all of the arguments...
-    std::vector<const Type*> ParamTypes;
+    std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
 
@@ -3686,14 +3518,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
-  // function attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FnAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
-    FnAttrs &= ~ObsoleteFuncAttrs;
-  }
-
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
@@ -3715,7 +3539,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3730,7 +3554,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end());
+  CallInst *CI = CallInst::Create(Callee, Args);
   CI->setTailCall(isTail);
   CI->setCallingConv(CC);
   CI->setAttributes(PAL);
@@ -3743,14 +3567,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 //===----------------------------------------------------------------------===//
 
 /// ParseAlloc
-///   ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)?
 ///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
-int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
-                         BasicBlock* BB, bool isAlloca) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Size = 0;
   LocTy SizeLoc;
   unsigned Alignment = 0;
+  Type *Ty = 0;
   if (ParseType(Ty)) return true;
 
   bool AteExtraComma = false;
@@ -3769,37 +3591,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
   if (Size && !Size->getType()->isIntegerTy())
     return Error(SizeLoc, "element count must have integer type");
 
-  if (isAlloca) {
-    Inst = new AllocaInst(Ty, Size, Alignment);
-    return AteExtraComma ? InstExtraComma : InstNormal;
-  }
-
-  // Autoupgrade old malloc instruction to malloc call.
-  // FIXME: Remove in LLVM 3.0.
-  if (Size && !Size->getType()->isIntegerTy(32))
-    return Error(SizeLoc, "element count must be i32");
-  const Type *IntPtrTy = Type::getInt32Ty(Context);
-  Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
-  if (!MallocF)
-    // Prototype malloc as "void *(int32)".
-    // This function is renamed as "malloc" in ValidateEndOfModule().
-    MallocF = cast<Function>(
-       M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
-  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
-return AteExtraComma ? InstExtraComma : InstNormal;
-}
-
-/// ParseFree
-///   ::= 'free' TypeAndValue
-bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
-                         BasicBlock* BB) {
-  Value *Val; LocTy Loc;
-  if (ParseTypeAndValue(Val, Loc, PFS)) return true;
-  if (!Val->getType()->isPointerTy())
-    return Error(Loc, "operand to free must be a pointer");
-  Inst = CallInst::CreateFree(Val, BB);
-  return false;
+  Inst = new AllocaInst(Ty, Size, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
 /// ParseLoad
@@ -3845,25 +3638,6 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
-/// ParseGetResult
-///   ::= 'getresult' TypeAndValue ',' i32
-/// FIXME: Remove support for getresult in LLVM 3.0
-bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
-  Value *Val; LocTy ValLoc, EltLoc;
-  unsigned Element;
-  if (ParseTypeAndValue(Val, ValLoc, PFS) ||
-      ParseToken(lltok::comma, "expected ',' after getresult operand") ||
-      ParseUInt32(Element, EltLoc))
-    return true;
-
-  if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
-    return Error(ValLoc, "getresult inst requires an aggregate operand");
-  if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
-    return Error(EltLoc, "invalid getresult index for value");
-  Inst = ExtractValueInst::Create(Val, Element);
-  return false;
-}
-
 /// ParseGetElementPtr
 ///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
@@ -3911,10 +3685,9 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Val->getType()->isAggregateType())
     return Error(Loc, "extractvalue operand must be aggregate type");
 
-  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
-                                        Indices.end()))
+  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
     return Error(Loc, "invalid indices for extractvalue");
-  Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end());
+  Inst = ExtractValueInst::Create(Val, Indices);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -3933,10 +3706,9 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Val0->getType()->isAggregateType())
     return Error(Loc0, "insertvalue operand must be aggregate type");
 
-  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
-                                        Indices.end()))
+  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
     return Error(Loc0, "invalid indices for insertvalue");
-  Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end());
+  Inst = InsertValueInst::Create(Val0, Val1, Indices);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -3962,12 +3734,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
     }
     
     Value *V = 0;
-    PATypeHolder Ty(Type::getVoidTy(Context));
-    ValID ID;
-    if (ParseType(Ty) || ParseValID(ID, PFS) ||
-        ConvertValIDToValue(Ty, ID, V, PFS))
-      return true;
-    
+    if (ParseTypeAndValue(V, PFS)) return true;
     Elts.push_back(V);
   } while (EatIfPresent(lltok::comma));
 
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 93e7f778ebcb..963065785061 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include <map>
 
@@ -32,6 +33,7 @@ namespace llvm {
   class GlobalValue;
   class MDString;
   class MDNode;
+  class StructType;
 
   /// ValID - Represents a reference of a definition of some sort with no type.
   /// There are several cases where we have to parse the value but where the
@@ -47,7 +49,9 @@ namespace llvm {
       t_Constant,                 // Value in ConstantVal.
       t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
       t_MDNode,                   // Value in MDNodeVal.
-      t_MDString                  // Value in MDStringVal.
+      t_MDString,                 // Value in MDStringVal.
+      t_ConstantStruct,           // Value in ConstantStructElts.
+      t_PackedConstantStruct      // Value in ConstantStructElts.
     } Kind;
     
     LLLexer::LocTy Loc;
@@ -58,12 +62,19 @@ namespace llvm {
     Constant *ConstantVal;
     MDNode *MDNodeVal;
     MDString *MDStringVal;
-    ValID() : APFloatVal(0.0) {}
+    Constant **ConstantStructElts;
+    
+    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
+    ~ValID() {
+      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
+        delete [] ConstantStructElts;
+    }
     
     bool operator<(const ValID &RHS) const {
       if (Kind == t_LocalID || Kind == t_GlobalID)
         return UIntVal < RHS.UIntVal;
-      assert((Kind == t_LocalName || Kind == t_GlobalName) && 
+      assert((Kind == t_LocalName || Kind == t_GlobalName ||
+              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && 
              "Ordering not defined for this ValID kind yet");
       return StrVal < RHS.StrVal;
     }
@@ -93,33 +104,13 @@ namespace llvm {
     };
     DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
 
-    // Type resolution handling data structures.
-    std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
-    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
-    std::vector<PATypeHolder> NumberedTypes;
+    // Type resolution handling data structures.  The location is set when we
+    // have processed a use of the type but not a definition yet.
+    StringMap<std::pair<Type*, LocTy> > NamedTypes;
+    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+    
     std::vector<TrackingVH<MDNode> > NumberedMetadata;
     std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
-    struct UpRefRecord {
-      /// Loc - This is the location of the upref.
-      LocTy Loc;
-
-      /// NestingLevel - The number of nesting levels that need to be popped
-      /// before this type is resolved.
-      unsigned NestingLevel;
-
-      /// LastContainedTy - This is the type at the current binding level for
-      /// the type.  Every time we reduce the nesting level, this gets updated.
-      const Type *LastContainedTy;
-
-      /// UpRefTy - This is the actual opaque type that the upreference is
-      /// represented with.
-      OpaqueType *UpRefTy;
-
-      UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
-        : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
-          UpRefTy(URTy) {}
-    };
-    std::vector<UpRefRecord> UpRefs;
 
     // Global Value reference information.
     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
@@ -131,14 +122,13 @@ namespace llvm {
     std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
       ForwardRefBlockAddresses;
     
-    Function *MallocF;
   public:
     LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
       Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
-      M(m), MallocF(NULL) {}
+      M(m) {}
     bool Run();
 
-    LLVMContext& getContext() { return Context; }
+    LLVMContext &getContext() { return Context; }
 
   private:
 
@@ -223,16 +213,19 @@ namespace llvm {
     bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
 
     // Type Parsing.
-    bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
-    bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
+    bool ParseType(Type *&Result, bool AllowVoid = false);
+    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
       Loc = Lex.getLoc();
       return ParseType(Result, AllowVoid);
     }
-    bool ParseTypeRec(PATypeHolder &H);
-    bool ParseStructType(PATypeHolder &H, bool Packed);
-    bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
-    bool ParseFunctionType(PATypeHolder &Result);
-    PATypeHolder HandleUpRefs(const Type *Ty);
+    bool ParseAnonStructType(Type *&Result, bool Packed);
+    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
+    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                               std::pair<Type*, LocTy> &Entry,
+                               Type *&ResultTy);
+
+    bool ParseArrayVectorType(Type *&Result, bool isVector);
+    bool ParseFunctionType(Type *&Result);
 
     // Function Semantic Analysis.
     class PerFunctionState {
@@ -279,14 +272,20 @@ namespace llvm {
     bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
                              PerFunctionState *PFS);
 
-    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS);
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+      return ParseValue(Ty, V, &PFS);
+    }
     bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
                     PerFunctionState &PFS) {
       Loc = Lex.getLoc();
-      return ParseValue(Ty, V, PFS);
+      return ParseValue(Ty, V, &PFS);
     }
 
-    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
+    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
+    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+      return ParseTypeAndValue(V, &PFS);
+    }
     bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
       Loc = Lex.getLoc();
       return ParseTypeAndValue(V, PFS);
@@ -322,14 +321,13 @@ namespace llvm {
     // Function Parsing.
     struct ArgInfo {
       LocTy Loc;
-      PATypeHolder Type;
+      Type *Ty;
       unsigned Attrs;
       std::string Name;
-      ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
-        : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
+      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
     };
-    bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
-                           bool &isVarArg, bool inType);
+    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
     bool ParseFunctionHeader(Function *&Fn, bool isDefine);
     bool ParseFunctionBody(Function &Fn);
     bool ParseBasicBlock(PerFunctionState &PFS);
@@ -341,7 +339,7 @@ namespace llvm {
                          PerFunctionState &PFS);
     bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
 
-    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
@@ -359,12 +357,9 @@ namespace llvm {
     bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
     int ParsePHI(Instruction *&I, PerFunctionState &PFS);
     bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
-    int ParseAlloc(Instruction *&I, PerFunctionState &PFS,
-                    BasicBlock *BB = 0, bool isAlloca = true);
-    bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB);
+    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
     int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
-    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
     int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 02f97a3d3d23..a5f89fcce0c0 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -32,7 +32,6 @@ namespace lltok {
     exclaim,           // !
 
     kw_x,
-    kw_begin,   kw_end,
     kw_true,    kw_false,
     kw_declare, kw_define,
     kw_global,  kw_constant,
@@ -99,6 +98,7 @@ namespace lltok {
     kw_noimplicitfloat,
     kw_naked,
     kw_hotpatch,
+    kw_nonlazybind,
 
     kw_type,
     kw_opaque,
@@ -121,9 +121,9 @@ namespace lltok {
     kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
     kw_unreachable,
 
-    kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr,
+    kw_alloca, kw_load, kw_store, kw_getelementptr,
 
-    kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
+    kw_extractelement, kw_insertelement, kw_shufflevector,
     kw_extractvalue, kw_insertvalue, kw_blockaddress,
 
     // Unsigned Valued tokens (UIntVal).
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index bc995aec83ee..24c29941cf16 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -31,7 +31,7 @@ void BitcodeReader::FreeState() {
   if (BufferOwned)
     delete Buffer;
   Buffer = 0;
-  std::vector<PATypeHolder>().swap(TypeList);
+  std::vector<Type*>().swap(TypeList);
   ValueList.clear();
   MDValueList.clear();
 
@@ -292,11 +292,9 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       // Make the new constant.
       Constant *NewC;
       if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
-        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0],
-                                        NewOps.size());
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
-        NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
-                                         UserCS->getType()->isPacked());
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(NewOps);
       } else {
@@ -354,19 +352,28 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   return V;
 }
 
-const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
-  // If the TypeID is in range, return it.
-  if (ID < TypeList.size())
-    return TypeList[ID].get();
-  if (!isTypeTable) return 0;
-
-  // The type table allows forward references.  Push as many Opaque types as
-  // needed to get up to ID.
-  while (TypeList.size() <= ID)
-    TypeList.push_back(OpaqueType::get(Context));
-  return TypeList.back().get();
+Type *BitcodeReader::getTypeByID(unsigned ID) {
+  // The type table size is always specified correctly.
+  if (ID >= TypeList.size())
+    return 0;
+  
+  if (Type *Ty = TypeList[ID])
+    return Ty;
+
+  // If we have a forward reference, the only possible case is when it is to a
+  // named struct.  Just create a placeholder for now.
+  return TypeList[ID] = StructType::createNamed(Context, "");
 }
 
+/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
+Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
+  if (ID >= TypeList.size())
+    TypeList.resize(ID+1);
+  
+  return TypeList[ID];
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Functions for parsing blocks from the bitcode file
 //===----------------------------------------------------------------------===//
@@ -473,17 +480,22 @@ bool BitcodeReader::ParseAttributeBlock() {
   }
 }
 
-
 bool BitcodeReader::ParseTypeTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID))
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
     return Error("Malformed block record");
+  
+  return ParseTypeTableBody();
+}
 
+bool BitcodeReader::ParseTypeTableBody() {
   if (!TypeList.empty())
     return Error("Multiple TYPE_BLOCKs found!");
 
   SmallVector<uint64_t, 64> Record;
   unsigned NumRecords = 0;
 
+  SmallString<64> TypeName;
+  
   // Read all the records for this type table.
   while (1) {
     unsigned Code = Stream.ReadCode();
@@ -510,17 +522,15 @@ bool BitcodeReader::ParseTypeTable() {
 
     // Read a record.
     Record.clear();
-    const Type *ResultTy = 0;
+    Type *ResultTy = 0;
     switch (Stream.ReadRecord(Code, Record)) {
-    default:  // Default behavior: unknown type.
-      ResultTy = 0;
-      break;
+    default: return Error("unknown type in type table");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
       // TYPE_CODE_NUMENTRY contains a count of the number of types in the
       // type list.  This allows us to reserve space.
       if (Record.size() < 1)
         return Error("Invalid TYPE_CODE_NUMENTRY record");
-      TypeList.reserve(Record[0]);
+      TypeList.resize(Record[0]);
       continue;
     case bitc::TYPE_CODE_VOID:      // VOID
       ResultTy = Type::getVoidTy(Context);
@@ -543,9 +553,6 @@ bool BitcodeReader::ParseTypeTable() {
     case bitc::TYPE_CODE_LABEL:     // LABEL
       ResultTy = Type::getLabelTy(Context);
       break;
-    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
-      ResultTy = 0;
-      break;
     case bitc::TYPE_CODE_METADATA:  // METADATA
       ResultTy = Type::getMetadataTy(Context);
       break;
@@ -565,8 +572,9 @@ bool BitcodeReader::ParseTypeTable() {
       unsigned AddressSpace = 0;
       if (Record.size() == 2)
         AddressSpace = Record[1];
-      ResultTy = PointerType::get(getTypeByID(Record[0], true),
-                                        AddressSpace);
+      ResultTy = getTypeByID(Record[0]);
+      if (ResultTy == 0) return Error("invalid element type in pointer type");
+      ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
     }
     case bitc::TYPE_CODE_FUNCTION: {
@@ -574,69 +582,306 @@ bool BitcodeReader::ParseTypeTable() {
       // FUNCTION: [vararg, attrid, retty, paramty x N]
       if (Record.size() < 3)
         return Error("Invalid FUNCTION type record");
-      std::vector<const Type*> ArgTys;
-      for (unsigned i = 3, e = Record.size(); i != e; ++i)
-        ArgTys.push_back(getTypeByID(Record[i], true));
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+      
+      ResultTy = getTypeByID(Record[2]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+        return Error("invalid type in function type");
 
-      ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys,
-                                   Record[0]);
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
-    case bitc::TYPE_CODE_STRUCT: {  // STRUCT: [ispacked, eltty x N]
+    case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return Error("Invalid STRUCT type record");
-      std::vector<const Type*> EltTys;
-      for (unsigned i = 1, e = Record.size(); i != e; ++i)
-        EltTys.push_back(getTypeByID(Record[i], true));
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid type in struct type");
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
+      if (ConvertToString(Record, 0, TypeName))
+        return Error("Invalid STRUCT_NAME record");
+      continue;
+
+    case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      
+      SmallVector<Type*, 8> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid STRUCT type record");
+      Res->setBody(EltTys, Record[0]);
+      ResultTy = Res;
+      break;
+    }
+    case bitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
+      if (Record.size() != 1)
+        return Error("Invalid OPAQUE type record");
+
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct with no body.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      ResultTy = Res;
+      break;
+    }        
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
-      ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]);
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
       break;
     case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid VECTOR type record");
-      ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]);
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
       break;
     }
 
-    if (NumRecords == TypeList.size()) {
-      // If this is a new type slot, just append it.
-      TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context));
-      ++NumRecords;
-    } else if (ResultTy == 0) {
-      // Otherwise, this was forward referenced, so an opaque type was created,
-      // but the result type is actually just an opaque.  Leave the one we
-      // created previously.
-      ++NumRecords;
-    } else {
-      // Otherwise, this was forward referenced, so an opaque type was created.
-      // Resolve the opaque type to the real type now.
-      assert(NumRecords < TypeList.size() && "Typelist imbalance");
-      const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get());
-
-      // Don't directly push the new type on the Tab. Instead we want to replace
-      // the opaque type we previously inserted with the new concrete value. The
-      // refinement from the abstract (opaque) type to the new type causes all
-      // uses of the abstract type to use the concrete type (NewTy). This will
-      // also cause the opaque type to be deleted.
-      const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy);
-
-      // This should have replaced the old opaque type with the new type in the
-      // value table... or with a preexisting type that was already in the
-      // system.  Let's just make sure it did.
-      assert(TypeList[NumRecords-1].get() != OldTy &&
-             "refineAbstractType didn't work!");
+    if (NumRecords >= TypeList.size())
+      return Error("invalid TYPE table");
+    assert(ResultTy && "Didn't read a type?");
+    assert(TypeList[NumRecords] == 0 && "Already read type?");
+    TypeList[NumRecords++] = ResultTy;
+  }
+}
+
+// FIXME: Remove in LLVM 3.1
+bool BitcodeReader::ParseOldTypeTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
+    return Error("Malformed block record");
+
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+  
+  
+  // While horrible, we have no good ordering of types in the bc file.  Just
+  // iteratively parse types out of the bc file in multiple passes until we get
+  // them all.  Do this by saving a cursor for the start of the type block.
+  BitstreamCursor StartOfTypeBlockCursor(Stream);
+  
+  unsigned NumTypesRead = 0;
+  
+  SmallVector<uint64_t, 64> Record;
+RestartScan:
+  unsigned NextTypeID = 0;
+  bool ReadAnyTypes = false;
+  
+  // Read all the records for this type table.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (NextTypeID != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
+      
+      // If we haven't read all of the types yet, iterate again.
+      if (NumTypesRead != TypeList.size()) {
+        // If we didn't successfully read any types in this pass, then we must
+        // have an unhandled forward reference.
+        if (!ReadAnyTypes)
+          return Error("Obsolete bitcode contains unhandled recursive type");
+        
+        Stream = StartOfTypeBlockCursor;
+        goto RestartScan;
+      }
+      
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type table block");
+      return false;
+    }
+    
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+    
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
     }
+    
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: return Error("unknown type in type table");
+    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case bitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case bitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case bitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case bitc::TYPE_CODE_METADATA:  // METADATA
+      ResultTy = Type::getMetadataTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
+      if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
+        ResultTy = StructType::createNamed(Context, "");
+      break;
+    case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
+      if (NextTypeID >= TypeList.size()) break;
+      // If we already read it, don't reprocess.
+      if (TypeList[NextTypeID] &&
+          !cast<StructType>(TypeList[NextTypeID])->isOpaque())
+        break;
+
+      // Set a type.
+      if (TypeList[NextTypeID] == 0)
+        TypeList[NextTypeID] = StructType::createNamed(Context, "");
+
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          EltTys.push_back(Elt);
+        else
+          break;
+      }
+
+      if (EltTys.size() != Record.size()-1)
+        break;      // Not all elements are ready.
+      
+      cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
+      ResultTy = TypeList[NextTypeID];
+      TypeList[NextTypeID] = 0;
+      break;
+    }
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+      //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      if ((ResultTy = getTypeByIDOrNull(Record[0])))
+        ResultTy = PointerType::get(ResultTy, AddressSpace);
+      break;
+    }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FIXME: attrid is dead, remove it in LLVM 3.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          ArgTys.push_back(Elt);
+        else
+          break;
+      }
+      if (ArgTys.size()+3 != Record.size())
+        break;  // Something was null.
+      if ((ResultTy = getTypeByIDOrNull(Record[2])))
+        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      break;
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      break;
+    }
+    
+    if (NextTypeID >= TypeList.size())
+      return Error("invalid TYPE table");
+    
+    if (ResultTy && TypeList[NextTypeID] == 0) {
+      ++NumTypesRead;
+      ReadAnyTypes = true;
+      
+      TypeList[NextTypeID] = ResultTy;
+    }
+    
+    ++NextTypeID;
   }
 }
 
 
-bool BitcodeReader::ParseTypeSymbolTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID))
+bool BitcodeReader::ParseOldTypeSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
@@ -676,7 +921,10 @@ bool BitcodeReader::ParseTypeSymbolTable() {
       if (TypeID >= TypeList.size())
         return Error("Invalid Type ID in TST_ENTRY record");
 
-      TheModule->addTypeName(TypeName, TypeList[TypeID].get());
+      // Only apply the type name to a struct type with no name.
+      if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
+        if (!STy->isAnonymous() && !STy->hasName())
+          STy->setName(TypeName);
       TypeName.clear();
       break;
     }
@@ -790,13 +1038,9 @@ bool BitcodeReader::ParseMetadata() {
       Record.clear();
       Code = Stream.ReadCode();
 
-      // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
-      // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE.
       unsigned NextBitCode = Stream.ReadRecord(Code, Record);
-      if (NextBitCode == bitc::METADATA_NAMED_NODE) {
-        LLVM2_7MetadataDetected = true;
-      } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
-        assert ( 0 && "Invalid Named Metadata record");
+      assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
 
       // Read named metadata elements.
       unsigned Size = Record.size();
@@ -807,35 +1051,20 @@ bool BitcodeReader::ParseMetadata() {
           return Error("Malformed metadata record");
         NMD->addOperand(MD);
       }
-      // Backwards compatibility hack: NamedMDValues used to be Values,
-      // and they got their own slots in the value numbering. They are no
-      // longer Values, however we still need to account for them in the
-      // numbering in order to be able to read old bitcode files.
-      // FIXME: Remove this in LLVM 3.0.
-      if (LLVM2_7MetadataDetected)
-        MDValueList.AssignValue(0, NextMDValueNo++);
       break;
     }
-    case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_FN_NODE2:
+    case bitc::METADATA_FN_NODE:
       IsFunctionLocal = true;
       // fall-through
-    case bitc::METADATA_NODE:    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_NODE2: {
-
-      // Detect 2.7-era metadata.
-      // FIXME: Remove in LLVM 3.0.
-      if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
-        LLVM2_7MetadataDetected = true;
-
+    case bitc::METADATA_NODE: {
       if (Record.size() % 2 == 1)
-        return Error("Invalid METADATA_NODE2 record");
+        return Error("Invalid METADATA_NODE record");
 
       unsigned Size = Record.size();
       SmallVector<Value*, 8> Elts;
       for (unsigned i = 0; i != Size; i += 2) {
         const Type *Ty = getTypeByID(Record[i]);
-        if (!Ty) return Error("Invalid METADATA_NODE2 record");
+        if (!Ty) return Error("Invalid METADATA_NODE record");
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
         else if (!Ty->isVoidTy())
@@ -1331,12 +1560,16 @@ bool BitcodeReader::ParseModule() {
         if (ParseAttributeBlock())
           return true;
         break;
-      case bitc::TYPE_BLOCK_ID:
+      case bitc::TYPE_BLOCK_ID_NEW:
         if (ParseTypeTable())
           return true;
         break;
-      case bitc::TYPE_SYMTAB_BLOCK_ID:
-        if (ParseTypeSymbolTable())
+      case bitc::TYPE_BLOCK_ID_OLD:
+        if (ParseOldTypeTable())
+          return true;
+        break;
+      case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
+        if (ParseOldTypeSymbolTable())
           return true;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
@@ -1755,10 +1988,7 @@ bool BitcodeReader::ParseMetadataAttachment() {
     switch (Stream.ReadRecord(Code, Record)) {
     default:  // Default behavior: ignore.
       break;
-    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_ATTACHMENT:
-      LLVM2_7MetadataDetected = true;
-    case bitc::METADATA_ATTACHMENT2: {
+    case bitc::METADATA_ATTACHMENT: {
       unsigned RecordLength = Record.size();
       if (Record.empty() || (RecordLength - 1) % 2 == 1)
         return Error ("Invalid METADATA_ATTACHMENT reader!");
@@ -1870,10 +2100,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = 0;
       continue;
         
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_DEBUG_LOC:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_DEBUG_LOC2: {      // DEBUG_LOC: [line, col, scope, ia]
+    case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = 0;     // Get the last instruction emitted.
       if (CurBB && !CurBB->empty())
         I = &CurBB->back();
@@ -1979,8 +2206,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         EXTRACTVALIdx.push_back((unsigned)Index);
       }
 
-      I = ExtractValueInst::Create(Agg,
-                                   EXTRACTVALIdx.begin(), EXTRACTVALIdx.end());
+      I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
       InstructionList.push_back(I);
       break;
     }
@@ -2004,8 +2230,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         INSERTVALIdx.push_back((unsigned)Index);
       }
 
-      I = InsertValueInst::Create(Agg, Val,
-                                  INSERTVALIdx.begin(), INSERTVALIdx.end());
+      I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
       InstructionList.push_back(I);
       break;
     }
@@ -2112,18 +2337,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
-      if (Record.size() != 2)
-        return Error("Invalid GETRESULT record");
-      unsigned OpNum = 0;
-      Value *Op;
-      getValueTypePair(Record, OpNum, NextValueNo, Op);
-      unsigned Index = Record[1];
-      I = ExtractValueInst::Create(Op, Index);
-      InstructionList.push_back(I);
-      break;
-    }
-
     case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
       {
         unsigned Size = Record.size();
@@ -2134,33 +2347,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
 
         unsigned OpNum = 0;
-        SmallVector<Value *,4> Vs;
-        do {
-          Value *Op = NULL;
-          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-            return Error("Invalid RET record");
-          Vs.push_back(Op);
-        } while(OpNum != Record.size());
-
-        const Type *ReturnType = F->getReturnType();
-        // Handle multiple return values. FIXME: Remove in LLVM 3.0.
-        if (Vs.size() > 1 ||
-            (ReturnType->isStructTy() &&
-             (Vs.empty() || Vs[0]->getType() != ReturnType))) {
-          Value *RV = UndefValue::get(ReturnType);
-          for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
-            I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
-            InstructionList.push_back(I);
-            CurBB->getInstList().push_back(I);
-            ValueList.AssignValue(I, NextValueNo++);
-            RV = I;
-          }
-          I = ReturnInst::Create(Context, RV);
-          InstructionList.push_back(I);
-          break;
-        }
+        Value *Op = NULL;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
 
-        I = ReturnInst::Create(Context, Vs[0]);
+        I = ReturnInst::Create(Context, Op);
         InstructionList.push_back(I);
         break;
       }
@@ -2272,8 +2465,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
       }
 
-      I = InvokeInst::Create(Callee, NormalBB, UnwindBB,
-                             Ops.begin(), Ops.end());
+      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
       InstructionList.push_back(I);
       cast<InvokeInst>(I)->setCallingConv(
         static_cast<CallingConv::ID>(CCInfo));
@@ -2307,47 +2499,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
-      // Autoupgrade malloc instruction to malloc call.
-      // FIXME: Remove in LLVM 3.0.
-      if (Record.size() < 3)
-        return Error("Invalid MALLOC record");
-      const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
-      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
-      if (!Ty || !Size) return Error("Invalid MALLOC record");
-      if (!CurBB) return Error("Invalid malloc instruction with no BB");
-      const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
-      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
-      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
-      I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
-                                 AllocSize, Size, NULL);
-      InstructionList.push_back(I);
-      break;
-    }
-    case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
-      unsigned OpNum = 0;
-      Value *Op;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
-          OpNum != Record.size())
-        return Error("Invalid FREE record");
-      if (!CurBB) return Error("Invalid free instruction with no BB");
-      I = CallInst::CreateFree(Op, CurBB);
-      InstructionList.push_back(I);
-      break;
-    }
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
-      // For backward compatibility, tolerate a lack of an opty, and use i32.
-      // Remove this in LLVM 3.0.
-      if (Record.size() < 3 || Record.size() > 4)
+      if (Record.size() != 4)
         return Error("Invalid ALLOCA record");
-      unsigned OpNum = 0;
       const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++]));
-      const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) :
-                                              Type::getInt32Ty(Context);
-      Value *Size = getFnValueByID(Record[OpNum++], OpTy);
-      unsigned Align = Record[OpNum++];
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      const Type *OpTy = getTypeByID(Record[1]);
+      Value *Size = getFnValueByID(Record[2], OpTy);
+      unsigned Align = Record[3];
       if (!Ty || !Size) return Error("Invalid ALLOCA record");
       I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
       InstructionList.push_back(I);
@@ -2364,7 +2523,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
+    case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -2377,24 +2536,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
-      // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0.
-      unsigned OpNum = 0;
-      Value *Val, *Ptr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
-          getValue(Record, OpNum,
-                   PointerType::getUnqual(Val->getType()), Ptr)||
-          OpNum+2 != Record.size())
-        return Error("Invalid STORE record");
-
-      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
-      InstructionList.push_back(I);
-      break;
-    }
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_INST_CALL:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_INST_CALL2: {
+    case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return Error("Invalid CALL record");
@@ -2416,7 +2558,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
-        if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID)
+        if (FTy->getParamType(i)->isLabelTy())
           Args.push_back(getBasicBlock(Record[OpNum]));
         else
           Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
@@ -2436,7 +2578,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
       }
 
-      I = CallInst::Create(Callee, Args.begin(), Args.end());
+      I = CallInst::Create(Callee, Args);
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
         static_cast<CallingConv::ID>(CCInfo>>1));
@@ -2513,23 +2655,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     BlockAddrFwdRefs.erase(BAFRI);
   }
   
-  // FIXME: Remove this in LLVM 3.0.
-  unsigned NewMDValueListSize = MDValueList.size();
-
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   MDValueList.shrinkTo(ModuleMDValueListSize);
-
-  // Backwards compatibility hack: Function-local metadata numbers
-  // were previously not reset between functions. This is now fixed,
-  // however we still need to understand the old numbering in order
-  // to be able to read old bitcode files.
-  // FIXME: Remove this in LLVM 3.0.
-  if (LLVM2_7MetadataDetected)
-    MDValueList.resize(NewMDValueListSize);
-
   std::vector<BasicBlock*>().swap(FunctionBBs);
-
   return false;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index f8fc079c73d9..1b3bf1a1854a 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -44,9 +44,9 @@ class BitcodeReaderValueList {
   /// number that holds the resolved value.
   typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
   ResolveConstantsTy ResolveConstants;
-  LLVMContext& Context;
+  LLVMContext &Context;
 public:
-  BitcodeReaderValueList(LLVMContext& C) : Context(C) {}
+  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
   ~BitcodeReaderValueList() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
   }
@@ -131,7 +131,7 @@ class BitcodeReader : public GVMaterializer {
   
   const char *ErrorString;
   
-  std::vector<PATypeHolder> TypeList;
+  std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
   BitcodeReaderMDValueList MDValueList;
   SmallVector<Instruction *, 64> InstructionList;
@@ -174,17 +174,10 @@ class BitcodeReader : public GVMaterializer {
   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
   DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
 
-  /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
-  /// earlier was detected, in which case we behave slightly differently,
-  /// for compatibility.
-  /// FIXME: Remove in LLVM 3.0.
-  bool LLVM2_7MetadataDetected;
-  
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
-      ErrorString(0), ValueList(C), MDValueList(C),
-      LLVM2_7MetadataDetected(false) {
+      ErrorString(0), ValueList(C), MDValueList(C) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
@@ -217,12 +210,12 @@ public:
   /// @returns true if an error occurred.
   bool ParseTriple(std::string &Triple);
 private:
-  const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
+  Type *getTypeByID(unsigned ID);
+  Type *getTypeByIDOrNull(unsigned ID);
   Value *getFnValueByID(unsigned ID, const Type *Ty) {
-    if (Ty == Type::getMetadataTy(Context))
+    if (Ty && Ty->isMetadataTy())
       return MDValueList.getValueFwdRef(ID);
-    else
-      return ValueList.getValueFwdRef(ID, Ty);
+    return ValueList.getValueFwdRef(ID, Ty);
   }
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
@@ -266,7 +259,10 @@ private:
   bool ParseModule();
   bool ParseAttributeBlock();
   bool ParseTypeTable();
-  bool ParseTypeSymbolTable();
+  bool ParseOldTypeTable();         // FIXME: Remove in LLVM 3.1
+  bool ParseTypeTableBody();
+
+  bool ParseOldTypeSymbolTable();   // FIXME: Remove in LLVM 3.1
   bool ParseValueSymbolTable();
   bool ParseConstants();
   bool RememberAndSkipFunctionBody();
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6972a451606a..85d67ce62b9f 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -21,13 +21,14 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Program.h"
 #include <cctype>
+#include <map>
 using namespace llvm;
 
 /// These are manifest constants used by the bitcode writer. They do not need to
@@ -100,13 +101,16 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   }
 }
 
-static void WriteStringRecord(unsigned Code, const std::string &Str,
+static void WriteStringRecord(unsigned Code, StringRef Str,
                               unsigned AbbrevToUse, BitstreamWriter &Stream) {
   SmallVector<unsigned, 64> Vals;
 
   // Code: [strchar x N]
-  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+      AbbrevToUse = 0;
     Vals.push_back(Str[i]);
+  }
 
   // Emit the finished record.
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
@@ -150,7 +154,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
 static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   const ValueEnumerator::TypeList &TypeList = VE.getTypes();
 
-  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */);
+  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
   // Abbrev for TYPE_CODE_POINTER.
@@ -171,15 +175,32 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
 
-  // Abbrev for TYPE_CODE_STRUCT.
+  // Abbrev for TYPE_CODE_STRUCT_ANON.
   Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT));
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
-  unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT_NAME.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
 
+  // Abbrev for TYPE_CODE_STRUCT_NAMED.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+
+  
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -201,16 +222,15 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
     switch (T->getTypeID()) {
     default: llvm_unreachable("Unknown type!");
-    case Type::VoidTyID:   Code = bitc::TYPE_CODE_VOID;   break;
-    case Type::FloatTyID:  Code = bitc::TYPE_CODE_FLOAT;  break;
-    case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
-    case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
-    case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+    case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;   break;
+    case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;  break;
+    case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE; break;
+    case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80; break;
+    case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128; break;
     case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
-    case Type::LabelTyID:  Code = bitc::TYPE_CODE_LABEL;  break;
-    case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
-    case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
-    case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+    case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;  break;
+    case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA; break;
+    case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX; break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
@@ -241,13 +261,28 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     case Type::StructTyID: {
       const StructType *ST = cast<StructType>(T);
       // STRUCT: [ispacked, eltty x N]
-      Code = bitc::TYPE_CODE_STRUCT;
       TypeVals.push_back(ST->isPacked());
       // Output all of the element types.
       for (StructType::element_iterator I = ST->element_begin(),
            E = ST->element_end(); I != E; ++I)
         TypeVals.push_back(VE.getTypeID(*I));
-      AbbrevToUse = StructAbbrev;
+      
+      if (ST->isAnonymous()) {
+        Code = bitc::TYPE_CODE_STRUCT_ANON;
+        AbbrevToUse = StructAnonAbbrev;
+      } else {
+        if (ST->isOpaque()) {
+          Code = bitc::TYPE_CODE_OPAQUE;
+        } else {
+          Code = bitc::TYPE_CODE_STRUCT_NAMED;
+          AbbrevToUse = StructNamedAbbrev;
+        }
+
+        // Emit the name if it is present.
+        if (!ST->getName().empty())
+          WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+                            StructNameAbbrev, Stream);
+      }
       break;
     }
     case Type::ArrayTyID: {
@@ -489,8 +524,8 @@ static void WriteMDNode(const MDNode *N,
       Record.push_back(0);
     }
   }
-  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
-                                           bitc::METADATA_NODE2;
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+                                           bitc::METADATA_NODE;
   Stream.EmitRecord(MDCode, Record, 0);
   Record.clear();
 }
@@ -553,7 +588,7 @@ static void WriteModuleMetadata(const Module *M,
     // Write named metadata operands.
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       Record.push_back(VE.getValueID(NMD->getOperand(i)));
-    Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+    Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
     Record.clear();
   }
 
@@ -589,7 +624,7 @@ static void WriteMetadataAttachment(const Function &F,
   SmallVector<uint64_t, 64> Record;
 
   // Write metadata attachments
-  // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
+  // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
   
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -607,7 +642,7 @@ static void WriteMetadataAttachment(const Function &F,
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getValueID(MDs[i].second));
       }
-      Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
+      Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
       Record.clear();
     }
 
@@ -1078,12 +1113,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
 
-  case Instruction::PHI:
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
-    Vals.push_back(VE.getTypeID(I.getType()));
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    Vals.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
+      Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
     break;
+  }
 
   case Instruction::Alloca:
     Code = bitc::FUNC_CODE_INST_ALLOCA;
@@ -1102,7 +1141,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     break;
   case Instruction::Store:
-    Code = bitc::FUNC_CODE_INST_STORE2;
+    Code = bitc::FUNC_CODE_INST_STORE;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
     Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
@@ -1113,7 +1152,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
     const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
 
-    Code = bitc::FUNC_CODE_INST_CALL2;
+    Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeID(CI.getAttributes()));
     Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
@@ -1257,7 +1296,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
         Vals.push_back(DL.getCol());
         Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
         Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
         Vals.clear();
         
         LastDL = DL;
@@ -1273,46 +1312,6 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   Stream.ExitBlock();
 }
 
-/// WriteTypeSymbolTable - Emit a block for the specified type symtab.
-static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
-                                 const ValueEnumerator &VE,
-                                 BitstreamWriter &Stream) {
-  if (TST.empty()) return;
-
-  Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3);
-
-  // 7-bit fixed width VST_CODE_ENTRY strings.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-  unsigned V7Abbrev = Stream.EmitAbbrev(Abbv);
-
-  SmallVector<unsigned, 64> NameVals;
-
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ++TI) {
-    // TST_ENTRY: [typeid, namechar x N]
-    NameVals.push_back(VE.getTypeID(TI->second));
-
-    const std::string &Str = TI->first;
-    bool is7Bit = true;
-    for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-      NameVals.push_back((unsigned char)Str[i]);
-      if (Str[i] & 128)
-        is7Bit = false;
-    }
-
-    // Emit the finished record.
-    Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0);
-    NameVals.clear();
-  }
-
-  Stream.ExitBlock();
-}
-
 // Emit blockinfo, which defines the standard abbreviations etc.
 static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   // We only want to emit block info records for blocks that have multiple
@@ -1516,9 +1515,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit metadata.
   WriteModuleMetadataStore(M, Stream);
 
-  // Emit the type symbol table information.
-  WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream);
-
   // Emit names for globals/functions etc.
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
@@ -1543,40 +1539,7 @@ enum {
   DarwinBCHeaderSize = 5*4
 };
 
-/// isARMTriplet - Return true if the triplet looks like:
-/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
-static bool isARMTriplet(const std::string &TT) {
-  size_t Pos = 0;
-  size_t Size = TT.size();
-  if (Size >= 6 &&
-      TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
-      TT[3] == 'm' && TT[4] == 'b')
-    Pos = 5;
-  else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
-    Pos = 3;
-  else
-    return false;
-
-  if (TT[Pos] == '-')
-    return true;
-  else if (TT[Pos] == 'v') {
-    if (Size >= Pos+4 &&
-        TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
-      return true;
-    else if (Size >= Pos+4 &&
-             TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
-      return true;
-  } else
-    return false;
-  while (++Pos < Size && TT[Pos] != '-') {
-    if (!isdigit(TT[Pos]))
-      return false;
-  }
-  return true;
-}
-
-static void EmitDarwinBCHeader(BitstreamWriter &Stream,
-                               const std::string &TT) {
+static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
   unsigned CPUType = ~0U;
 
   // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
@@ -1590,16 +1553,16 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
 
-  if (TT.find("x86_64-") == 0)
+  Triple::ArchType Arch = TT.getArch();
+  if (Arch == Triple::x86_64)
     CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
-  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
-           TT[4] == '-' && TT[1] - '3' < 6)
+  else if (Arch == Triple::x86)
     CPUType = DARWIN_CPU_TYPE_X86;
-  else if (TT.find("powerpc-") == 0)
+  else if (Arch == Triple::ppc)
     CPUType = DARWIN_CPU_TYPE_POWERPC;
-  else if (TT.find("powerpc64-") == 0)
+  else if (Arch == Triple::ppc64)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
-  else if (isARMTriplet(TT))
+  else if (Arch == Triple::arm || Arch == Triple::thumb)
     CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
@@ -1645,11 +1608,9 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
 void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   // If this is darwin or another generic macho target, emit a file header and
   // trailer if needed.
-  bool isMacho =
-    M->getTargetTriple().find("-darwin") != std::string::npos ||
-    M->getTargetTriple().find("-macho") != std::string::npos;
-  if (isMacho)
-    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+  Triple TT(M->getTargetTriple());
+  if (TT.isOSDarwin())
+    EmitDarwinBCHeader(Stream, TT);
 
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
@@ -1662,6 +1623,6 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   // Emit the module.
   WriteModule(M, Stream);
 
-  if (isMacho)
+  if (TT.isOSDarwin())
     EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 5138c3c984f3..b68bf92d51b2 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
 #include <algorithm>
@@ -59,9 +58,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
        I != E; ++I)
     EnumerateValue(I->getAliasee());
 
-  // Enumerate types used by the type symbol table.
-  EnumerateTypeSymbolTable(M->getTypeSymbolTable());
-
   // Insert constants and metadata that are named at module level into the slot 
   // pool so that the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
@@ -109,78 +105,12 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
 
   // Optimize constant ordering.
   OptimizeConstants(FirstConstant, Values.size());
-
-  OptimizeTypes();
-
-  // Now that we rearranged the type table, rebuild TypeMap.
-  for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    TypeMap[Types[i]] = i+1;
-}
-
-struct TypeAndDeps {
-  const Type *Ty;
-  unsigned NumDeps;
-};
-
-static int CompareByDeps(const void *a, const void *b) {
-  const TypeAndDeps &ta = *(const TypeAndDeps*) a;
-  const TypeAndDeps &tb = *(const TypeAndDeps*) b;
-  return ta.NumDeps - tb.NumDeps;
-}
-
-static void VisitType(const Type *Ty, SmallPtrSet<const Type*, 16> &Visited,
-                      std::vector<const Type*> &Out) {
-  if (Visited.count(Ty))
-    return;
-
-  Visited.insert(Ty);
-
-  for (Type::subtype_iterator I2 = Ty->subtype_begin(),
-         E2 = Ty->subtype_end(); I2 != E2; ++I2) {
-    const Type *InnerType = I2->get();
-    VisitType(InnerType, Visited, Out);
-  }
-
-  Out.push_back(Ty);
 }
 
-void ValueEnumerator::OptimizeTypes(void) {
-  // If the types form a DAG, this will compute a topological sort and
-  // no forward references will be needed when reading them in.
-  // If there are cycles, this is a simple but reasonable heuristic for
-  // the minimum feedback arc set problem.
-  const unsigned NumTypes = Types.size();
-  std::vector<TypeAndDeps> TypeDeps;
-  TypeDeps.resize(NumTypes);
-
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    const Type *Ty = Types[I];
-    TypeDeps[I].Ty = Ty;
-    TypeDeps[I].NumDeps = 0;
-  }
-
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    const Type *Ty = TypeDeps[I].Ty;
-    for (Type::subtype_iterator I2 = Ty->subtype_begin(),
-           E2 = Ty->subtype_end(); I2 != E2; ++I2) {
-      const Type *InnerType = I2->get();
-      unsigned InnerIndex = TypeMap.lookup(InnerType) - 1;
-      TypeDeps[InnerIndex].NumDeps++;
-    }
-  }
-  array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps);
-
-  SmallPtrSet<const Type*, 16> Visited;
-  Types.clear();
-  Types.reserve(NumTypes);
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    VisitType(TypeDeps[I].Ty, Visited, Types);
-  }
-}
 
 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
   InstructionMapType::const_iterator I = InstructionMap.find(Inst);
-  assert (I != InstructionMap.end() && "Instruction is not mapped!");
+  assert(I != InstructionMap.end() && "Instruction is not mapped!");
   return I->second;
 }
 
@@ -235,14 +165,6 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
 }
 
 
-/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol
-/// table.
-void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ++TI)
-    EnumerateType(TI->second);
-}
-
 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
 /// table into the values table.
 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
@@ -394,20 +316,40 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
 
 
 void ValueEnumerator::EnumerateType(const Type *Ty) {
-  unsigned &TypeID = TypeMap[Ty];
+  unsigned *TypeID = &TypeMap[Ty];
 
   // We've already seen this type.
-  if (TypeID)
+  if (*TypeID)
     return;
 
-  // First time we saw this type, add it.
-  Types.push_back(Ty);
-  TypeID = Types.size();
-
-  // Enumerate subtypes.
+  // If it is a non-anonymous struct, mark the type as being visited so that we
+  // don't recursively visit it.  This is safe because we allow forward
+  // references of these in the bitcode reader.
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isAnonymous())
+      *TypeID = ~0U;
+  
+  // Enumerate all of the subtypes before we enumerate this type.  This ensures
+  // that the type will be enumerated in an order that can be directly built.
   for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
        I != E; ++I)
     EnumerateType(*I);
+  
+  // Refresh the TypeID pointer in case the table rehashed.
+  TypeID = &TypeMap[Ty];
+  
+  // Check to see if we got the pointer another way.  This can happen when
+  // enumerating recursive types that hit the base case deeper than they start.
+  //
+  // If this is actually a struct that we are treating as forward ref'able,
+  // then emit the definition now that all of its contents are available.
+  if (*TypeID && *TypeID != ~0U)
+    return;
+  
+  // Add this type now that its contents are all happily enumerated.
+  Types.push_back(Ty);
+  
+  *TypeID = Types.size();
 }
 
 // Enumerate the types for the specified value.  If the value is a constant,
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 1e42a2667669..6617b60deb26 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -30,7 +30,6 @@ class Module;
 class MDNode;
 class NamedMDNode;
 class AttrListPtr;
-class TypeSymbolTable;
 class ValueSymbolTable;
 class MDSymbolTable;
 
@@ -135,7 +134,6 @@ public:
 
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
-  void OptimizeTypes();
     
   void EnumerateMDNodeOperands(const MDNode *N);
   void EnumerateMetadata(const Value *MD);
@@ -146,7 +144,6 @@ private:
   void EnumerateOperandType(const Value *V);
   void EnumerateAttributes(const AttrListPtr &PAL);
   
-  void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
   void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
   void EnumerateNamedMetadata(const Module *M);
 };
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index dca1d29665ac..25842a7876a2 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "post-RA-sched"
 #include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 AggressiveAntiDepBreaker::
 AggressiveAntiDepBreaker(MachineFunction& MFi,
-                         TargetSubtarget::RegClassVector& CriticalPathRCs) :
+                         const RegisterClassInfo &RCI,
+                         TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   State(NULL) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
@@ -402,7 +404,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
-      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
@@ -477,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
-      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
@@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
-  const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
-  if (RB == RE) {
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
   }
@@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   DEBUG(dbgs() << "\tFind Registers:");
 
   if (RenameOrder.count(SuperRC) == 0)
-    RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
 
-  const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
-  const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
-  TargetRegisterClass::iterator R = OrigR;
+  unsigned OrigR = RenameOrder[SuperRC];
+  unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+  unsigned R = OrigR;
   do {
-    if (R == RB) R = RE;
+    if (R == 0) R = Order.size();
     --R;
-    const unsigned NewSuperReg = *R;
+    const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewSuperReg)) continue;
+    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -819,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!AllocatableSet.test(AntiDepReg)) {
+        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index b7ddafc32980..706778485429 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,13 +23,15 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
+
   /// Class AggressiveAntiDepState
   /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
@@ -117,11 +119,7 @@ namespace llvm {
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-
-    /// AllocatableSet - The set of allocatable registers.
-    /// We'll be ignoring anti-dependencies on non-allocatable registers,
-    /// because they may not be safe to break.
-    const BitVector AllocatableSet;
+    const RegisterClassInfo &RegClassInfo;
 
     /// CriticalPathSet - The set of registers that should only be
     /// renamed if they are on the critical path.
@@ -133,7 +131,8 @@ namespace llvm {
 
   public:
     AggressiveAntiDepBreaker(MachineFunction& MFi,
-                             TargetSubtarget::RegClassVector& CriticalPathRCs);
+                          const RegisterClassInfo &RCI,
+                          TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
@@ -158,8 +157,8 @@ namespace llvm {
     void FinishBlock();
 
   private:
-    typedef std::map<const TargetRegisterClass *,
-                     TargetRegisterClass::const_iterator> RenameOrderType;
+    /// Keep track of a position in the allocation order for each regclass.
+    typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
 
     /// IsImplicitDefUse - Return true if MO represents a register
     /// that is both implicitly used and defined in MI
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index a8ee2b6357c3..1005f102bea6 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,21 +41,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   if (HintPair.first) {
     const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
     // The remaining allocation order may depend on the hint.
-    const unsigned *B, *E;
-    tie(B, E) = TRI.getAllocationOrder(RC, HintPair.first, Hint,
-                                       VRM.getMachineFunction());
-
-    // Empty allocation order?
-    if (B == E)
+    ArrayRef<unsigned> Order =
+      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+                                VRM.getMachineFunction());
+    if (Order.empty())
       return;
 
     // Copy the allocation order with reserved registers removed.
     OwnedBegin = true;
-    unsigned *P = new unsigned[E - B];
+    unsigned *P = new unsigned[Order.size()];
     Begin = P;
-    for (; B != E; ++B)
-      if (!RCI.isReserved(*B))
-        *P++ = *B;
+    for (unsigned i = 0; i != Order.size(); ++i)
+      if (!RCI.isReserved(Order[i]))
+        *P++ = Order[i];
     End = P;
 
     // Target-dependent hints require resolution.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 161afbafb57b..7f314eed3ae6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -575,6 +575,8 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     }
   } else if (MI->getOperand(0).isImm()) {
     OS << MI->getOperand(0).getImm();
+  } else if (MI->getOperand(0).isCImm()) {
+    MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
   } else {
     assert(MI->getOperand(0).isReg() && "Unknown operand type");
     if (MI->getOperand(0).getReg() == 0) {
@@ -1211,9 +1213,9 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1226,11 +1228,11 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the
 /// function pointers, ignoring the init priority.
-void AsmPrinter::EmitXXStructorList(Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (!isa<ConstantArray>(List)) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
       if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
@@ -1516,6 +1518,13 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CV->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+                         CV->getType()->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
@@ -1925,7 +1934,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     return false;
 
   // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *PI;
+  MachineBasicBlock *Pred = *PI;
 
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
@@ -1934,9 +1943,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   if (Pred->empty())
     return true;
 
-  // Otherwise, check the last instruction.
-  const MachineInstr &LastInst = Pred->back();
-  return !LastInst.getDesc().isBarrier();
+  // Check the terminators in the previous blocks
+  for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+         IE = Pred->end(); II != IE; ++II) {
+    MachineInstr &MI = *II;
+
+    // If it is not a simple branch, we are in a table somewhere.
+    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+      return false;
+
+    // If we are the operands of one of the branches, this is not
+    // a fall through.
+    for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+           OE = MI.operands_end(); OI != OE; ++OI) {
+      const MachineOperand& OP = *OI;
+      if (OP.isJTI())
+        return false;
+      if (OP.isMBB() && OP.getMBB() == MBB)
+        return false;
+    }
+  }
+
+  return true;
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c6166e2365a5..5ac455e1a1a1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetMachine.h"
@@ -111,7 +112,16 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
                                                   OutContext, OutStreamer,
                                                   *MAI));
-  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM));
+
+  // FIXME: It would be nice if we can avoid createing a new instance of
+  // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+  // we have to watch out for asm directives which can change subtarget
+  // state. e.g. .code 16, .code 32.
+  OwningPtr<MCSubtargetInfo>
+    STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+                                             TM.getTargetCPU(),
+                                             TM.getTargetFeatureString()));
+  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index bff1a3529984..1fe035efde3e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -491,7 +491,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI,
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
                                    bool Unsigned) {
   unsigned CIBitWidth = CI->getBitWidth();
   if (CIBitWidth <= 64) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 60a9b2872b41..213c7fc630d3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -181,7 +181,7 @@ public:
 
   /// addConstantValue - Add constant value entry in variable DIE.
   bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
-  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
+  bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
   bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8845bfac5e8d..125e1e86b12f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -229,6 +229,7 @@ public:
 void DbgScope::dump() const {
   raw_ostream &err = dbgs();
   err.indent(IndentLevel);
+  err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
   const MDNode *N = Desc;
   N->dump();
   if (AbstractScope)
@@ -618,6 +619,21 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   return ScopeDIE;
 }
 
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+  DIDerivedType DTy(Ty);
+  if (DTy.Verify())
+    return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char)
+      return true;
+  }
+  return false;
+}
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
 DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
@@ -718,6 +734,11 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
           VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isCImm())
+        updated =
+          VariableCU->addConstantValue(VariableDie, 
+                                       DVInsn->getOperand(0).getCImm(),
+                                       isUnsignedDIType(DV->getType()));
     } else {
       VariableCU->addVariableAddress(DV, VariableDie, 
                                      Asm->getDebugValueLocation(DVInsn));
@@ -913,22 +934,6 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
   return I->second;
 }
 
-/// isUnsignedDIType - Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.Verify())
-    return isUnsignedDIType(DTy.getTypeDerivedFrom());
-
-  DIBasicType BTy(Ty);
-  if (BTy.Verify()) {
-    unsigned Encoding = BTy.getEncoding();
-    if (Encoding == dwarf::DW_ATE_unsigned ||
-        Encoding == dwarf::DW_ATE_unsigned_char)
-      return true;
-  }
-  return false;
-}
-
 // Return const exprssion if value is a GEP to access merged global
 // constant. e.g.
 // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
@@ -1017,7 +1022,7 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
     } else {
       TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
-  } else if (ConstantInt *CI = 
+  } else if (const ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
     TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
@@ -1310,7 +1315,6 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
 void
 DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
                                    SmallPtrSet<const MDNode *, 16> &Processed) {
-  const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
@@ -1320,11 +1324,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
     DIVariable DV(Var);
     const std::pair<unsigned, DebugLoc> &VP = VI->second;
 
-    DbgScope *Scope = 0;
-    if (const MDNode *IA = VP.second.getInlinedAt(Ctx))
-      Scope = ConcreteScopes.lookup(IA);
-    if (Scope == 0)
-      Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
+    DbgScope *Scope = findDbgScope(VP.second);
 
     // If variable scope is not found then skip this variable.
     if (Scope == 0)
@@ -1351,6 +1351,34 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
+/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting
+/// at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, 
+                                         const MCSymbol *FLabel, 
+                                         const MCSymbol *SLabel,
+                                         const MachineInstr *MI) {
+  const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+  if (MI->getNumOperands() != 3) {
+    MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+    MachineLocation MLoc;
+    MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+  if (MI->getOperand(0).isFPImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+  if (MI->getOperand(0).isCImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+  assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
+  return DotDebugLocEntry();
+}
+
 /// collectVariableInfo - Populate DbgScope entries with variables' info.
 void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
@@ -1379,7 +1407,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
       Scope = CurrentFnDbgScope;
     else
-      Scope = findDbgScope(MInsn);
+      Scope = findDbgScope(MInsn->getDebugLoc());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
@@ -1424,6 +1452,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         SLabel = FunctionEndSym;
       else {
         const MachineInstr *End = HI[1];
+        DEBUG(dbgs() << "DotDebugLoc Pair:\n" 
+              << "\t" << *Begin << "\t" << *End << "\n");
         if (End->isDebugValue())
           SLabel = getLabelBeforeInsn(End);
         else {
@@ -1435,25 +1465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      MachineLocation MLoc;
-      if (Begin->getNumOperands() == 3) {
-        if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) {
-          MLoc.set(Begin->getOperand(0).getReg(), 
-                   Begin->getOperand(1).getImm());
-          DotDebugLocEntries.
-            push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
-        }
-        // FIXME: Handle isFPImm also.
-        else if (Begin->getOperand(0).isImm()) {
-          DotDebugLocEntries.
-            push_back(DotDebugLocEntry(FLabel, SLabel, 
-                                       Begin->getOperand(0).getImm()));
-        }
-      } else {
-        MLoc = Asm->getDebugValueLocation(Begin);
-        DotDebugLocEntries.
-          push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
-      }
+      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -1550,8 +1562,12 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
 }
 
 /// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
-                                          const MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) {
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  MDNode *Scope = NULL;
+  MDNode *InlinedAt = NULL;
+  DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
@@ -1560,22 +1576,12 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
     if (DIDescriptor(Scope).isLexicalBlock()) {
       DbgScope *Parent =
-        getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
+        getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope));
       WScope->setParent(Parent);
       Parent->addScope(WScope);
-    }
-
-    if (!WScope->getParent()) {
-      StringRef SPName = DISubprogram(Scope).getLinkageName();
-      // We used to check only for a linkage name, but that fails
-      // since we began omitting the linkage name for private
-      // functions.  The new way is to check for the name in metadata,
-      // but that's not supported in old .ll test cases.  Ergo, we
-      // check both.
-      if (SPName == Asm->MF->getFunction()->getName() ||
-          DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
-        CurrentFnDbgScope = WScope;
-    }
+    } else if (DIDescriptor(Scope).isSubprogram()
+               && DISubprogram(Scope).describes(Asm->MF->getFunction()))
+      CurrentFnDbgScope = WScope;
 
     return WScope;
   }
@@ -1587,37 +1593,14 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
 
   WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
   DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
-  DILocation DL(InlinedAt);
+  InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope;
   DbgScope *Parent =
-    getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation());
+    getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt));
   WScope->setParent(Parent);
   Parent->addScope(WScope);
-
-  ConcreteScopes[InlinedAt] = WScope;
-
   return WScope;
 }
 
-/// hasValidLocation - Return true if debug location entry attached with
-/// machine instruction encodes valid location info.
-static bool hasValidLocation(LLVMContext &Ctx,
-                             const MachineInstr *MInsn,
-                             const MDNode *&Scope, const MDNode *&InlinedAt) {
-  DebugLoc DL = MInsn->getDebugLoc();
-  if (DL.isUnknown()) return false;
-
-  const MDNode *S = DL.getScope(Ctx);
-
-  // There is no need to create another DIE for compile unit. For all
-  // other scopes, create one DbgScope now. This will be translated
-  // into a scope DIE at the end.
-  if (DIScope(S).isCompileUnit()) return false;
-
-  Scope = S;
-  InlinedAt = DL.getInlinedAt(Ctx);
-  return true;
-}
-
 /// calculateDominanceGraph - Calculate dominance graph for DbgScope
 /// hierarchy.
 static void calculateDominanceGraph(DbgScope *Scope) {
@@ -1648,21 +1631,24 @@ static void calculateDominanceGraph(DbgScope *Scope) {
 
 /// printDbgScopeInfo - Print DbgScope info for each machine instruction.
 static
-void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
+void printDbgScopeInfo(const MachineFunction *MF,
                        DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
 {
 #ifndef NDEBUG
+  LLVMContext &Ctx = MF->getFunction()->getContext();
   unsigned PrevDFSIn = 0;
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      const MDNode *Scope = NULL;
-      const MDNode *InlinedAt = NULL;
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
-      if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+      DebugLoc MIDL = MInsn->getDebugLoc();
+      if (!MIDL.isUnknown()) {
+        MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
         dbgs() << " [ ";
         if (InlinedAt)
           dbgs() << "*";
@@ -1692,11 +1678,9 @@ bool DwarfDebug::extractScopeInformation() {
     return false;
 
   // Scan each instruction and create scopes. First build working set of scopes.
-  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
   SmallVector<DbgRange, 4> MIRanges;
   DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
-  const MDNode *PrevScope = NULL;
-  const MDNode *PrevInlinedAt = NULL;
+  DebugLoc PrevDL;
   const MachineInstr *RangeBeginMI = NULL;
   const MachineInstr *PrevMI = NULL;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
@@ -1704,17 +1688,16 @@ bool DwarfDebug::extractScopeInformation() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      const MDNode *Scope = NULL;
-      const MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
-      if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+      const DebugLoc MIDL = MInsn->getDebugLoc();
+      if (MIDL.isUnknown()) {
         PrevMI = MInsn;
         continue;
       }
 
       // If scope has not changed then skip this instruction.
-      if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
+      if (MIDL == PrevDL) {
         PrevMI = MInsn;
         continue;
       }
@@ -1727,9 +1710,13 @@ bool DwarfDebug::extractScopeInformation() {
         // If we have alread seen a beginning of a instruction range and
         // current instruction scope does not match scope of first instruction
         // in this range then create a new instruction range.
+        DEBUG(dbgs() << "Creating new instruction range :\n");
+        DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI);
+        DEBUG(dbgs() << "End Range at " << *PrevMI);
+        DEBUG(dbgs() << "Next Range starting at " << *MInsn);
+        DEBUG(dbgs() << "------------------------\n");
         DbgRange R(RangeBeginMI, PrevMI);
-        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
-                                                        PrevInlinedAt);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
         MIRanges.push_back(R);
       }
 
@@ -1738,16 +1725,15 @@ bool DwarfDebug::extractScopeInformation() {
 
       // Reset previous markers.
       PrevMI = MInsn;
-      PrevScope = Scope;
-      PrevInlinedAt = InlinedAt;
+      PrevDL = MIDL;
     }
   }
 
   // Create last instruction range.
-  if (RangeBeginMI && PrevMI && PrevScope) {
+  if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
     DbgRange R(RangeBeginMI, PrevMI);
     MIRanges.push_back(R);
-    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
   }
 
   if (!CurrentFnDbgScope)
@@ -1755,7 +1741,7 @@ bool DwarfDebug::extractScopeInformation() {
 
   calculateDominanceGraph(CurrentFnDbgScope);
   if (PrintDbgScope)
-    printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap);
+    printDbgScopeInfo(Asm->MF, MI2ScopeMap);
 
   // Find ranges of instructions covered by each DbgScope;
   DbgScope *PrevDbgScope = NULL;
@@ -1842,8 +1828,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
-  /// ProcessedArgs - Collection of arguments already processed.
-  SmallPtrSet<const MDNode *, 8> ProcessedArgs;
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
   /// LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
@@ -1883,8 +1867,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
           if (Prev->isDebugValue()) {
             // Coalesce identical entries at the end of History.
             if (History.size() >= 2 &&
-                Prev->isIdenticalTo(History[History.size() - 2]))
+                Prev->isIdenticalTo(History[History.size() - 2])) {
+              DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+                    << "\t" << *Prev 
+                    << "\t" << *History[History.size() - 2] << "\n");
               History.pop_back();
+            }
 
             // Terminate old register assignments that don't reach MI;
             MachineFunction::const_iterator PrevMBB = Prev->getParent();
@@ -1894,9 +1882,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
               // its basic block.
               MachineBasicBlock::const_iterator LastMI =
                 PrevMBB->getLastNonDebugInstr();
-              if (LastMI == PrevMBB->end())
+              if (LastMI == PrevMBB->end()) {
                 // Drop DBG_VALUE for empty range.
+                DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+                      << "\t" << *Prev << "\n");
                 History.pop_back();
+              }
               else {
                 // Terminate after LastMI.
                 History.push_back(LastMI);
@@ -2053,10 +2044,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
+  InlinedDbgScopeMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
   UserVariables.clear();
   DbgValues.clear();
-  ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
   AbstractVariables.clear();
@@ -2083,22 +2074,17 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
   return true;
 }
 
-/// findDbgScope - Find DbgScope for the debug loc attached with an
-/// instruction.
-DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
-  DbgScope *Scope = NULL;
-  LLVMContext &Ctx =
-    MInsn->getParent()->getParent()->getFunction()->getContext();
-  DebugLoc DL = MInsn->getDebugLoc();
-
+/// findDbgScope - Find DbgScope for the debug loc.
+DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) {
   if (DL.isUnknown())
-    return Scope;
+    return NULL;
 
-  if (const MDNode *IA = DL.getInlinedAt(Ctx))
-    Scope = ConcreteScopes.lookup(IA);
-  if (Scope == 0)
+  DbgScope *Scope = NULL;
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  if (MDNode *IA = DL.getInlinedAt(Ctx))
+    Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+  else
     Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-
   return Scope;
 }
 
@@ -2597,56 +2583,61 @@ void DwarfDebug::emitDebugLoc() {
       MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
       Asm->EmitLabelDifference(end, begin, 2);
       Asm->OutStreamer.EmitLabel(begin);
-      if (Entry.isConstant()) {
+      if (Entry.isInt()) {
         DIBasicType BTy(DV.getType());
         if (BTy.Verify() &&
             (BTy.getEncoding()  == dwarf::DW_ATE_signed 
              || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
           Asm->OutStreamer.AddComment("DW_OP_consts");
           Asm->EmitInt8(dwarf::DW_OP_consts);
-          Asm->EmitSLEB128(Entry.getConstant());
+          Asm->EmitSLEB128(Entry.getInt());
         } else {
           Asm->OutStreamer.AddComment("DW_OP_constu");
           Asm->EmitInt8(dwarf::DW_OP_constu);
-          Asm->EmitULEB128(Entry.getConstant());
+          Asm->EmitULEB128(Entry.getInt());
         }
-      } else if (DV.hasComplexAddress()) {
-        unsigned N = DV.getNumAddrElements();
-        unsigned i = 0;
-        if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
-          if (Entry.Loc.getOffset()) {
-            i = 2;
-            Asm->EmitDwarfRegOp(Entry.Loc);
-            Asm->OutStreamer.AddComment("DW_OP_deref");
-            Asm->EmitInt8(dwarf::DW_OP_deref);
-            Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
-            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-            Asm->EmitSLEB128(DV.getAddrElement(1));
+      } else if (Entry.isLocation()) {
+        if (!DV.hasComplexAddress()) 
+          // Regular entry.
+          Asm->EmitDwarfRegOp(Entry.Loc);
+        else {
+          // Complex address entry.
+          unsigned N = DV.getNumAddrElements();
+          unsigned i = 0;
+          if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+            if (Entry.Loc.getOffset()) {
+              i = 2;
+              Asm->EmitDwarfRegOp(Entry.Loc);
+              Asm->OutStreamer.AddComment("DW_OP_deref");
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+              Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitSLEB128(DV.getAddrElement(1));
+            } else {
+              // If first address element is OpPlus then emit
+              // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+              MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+              Asm->EmitDwarfRegOp(Loc);
+              i = 2;
+            }
           } else {
-            // If first address element is OpPlus then emit
-            // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-            MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
-            Asm->EmitDwarfRegOp(Loc);
-            i = 2;
+            Asm->EmitDwarfRegOp(Entry.Loc);
+          }
+          
+          // Emit remaining complex address elements.
+          for (; i < N; ++i) {
+            uint64_t Element = DV.getAddrElement(i);
+            if (Element == DIBuilder::OpPlus) {
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitULEB128(DV.getAddrElement(++i));
+            } else if (Element == DIBuilder::OpDeref)
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+            else llvm_unreachable("unknown Opcode found in complex address");
           }
-        } else {
-          Asm->EmitDwarfRegOp(Entry.Loc);
-        }
-
-        // Emit remaining complex address elements.
-        for (; i < N; ++i) {
-          uint64_t Element = DV.getAddrElement(i);
-          if (Element == DIBuilder::OpPlus) {
-            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-            Asm->EmitULEB128(DV.getAddrElement(++i));
-          } else if (Element == DIBuilder::OpDeref)
-            Asm->EmitInt8(dwarf::DW_OP_deref);
-          else llvm_unreachable("unknown Opcode found in complex address");
         }
-      } else {
-        // Regular entry.
-        Asm->EmitDwarfRegOp(Entry.Loc);
       }
+      // else ... ignore constant fp. There is not any good way to
+      // to represent them here in dwarf.
       Asm->OutStreamer.EmitLabel(end);
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index abda2e61d31e..b2450064e3d0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -69,17 +69,35 @@ typedef struct DotDebugLocEntry {
   const MDNode *Variable;
   bool Merged;
   bool Constant;
-  int64_t iConstant;
+  enum EntryType {
+    E_Location,
+    E_Integer,
+    E_ConstantFP,
+    E_ConstantInt
+  };
+  enum EntryType EntryKind;
+
+  union {
+    int64_t Int;
+    const ConstantFP *CFP;
+    const ConstantInt *CIP;
+  } Constants;
   DotDebugLocEntry() 
     : Begin(0), End(0), Variable(0), Merged(false), 
-      Constant(false), iConstant(0) {}
+      Constant(false) { Constants.Int = 0;}
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
                    const MDNode *V) 
     : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
-      Constant(false), iConstant(0) {}
+      Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
     : Begin(B), End(E), Variable(0), Merged(false), 
-      Constant(true), iConstant(i) {}
+      Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
   /// Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
@@ -91,8 +109,13 @@ typedef struct DotDebugLocEntry {
     Next->Begin = Begin;
     Merged = true;
   }
-  bool isConstant() { return Constant; }
-  int64_t getConstant() { return iConstant; }
+  bool isLocation() const    { return EntryKind == E_Location; }
+  bool isInt() const         { return EntryKind == E_Integer; }
+  bool isConstantFP() const  { return EntryKind == E_ConstantFP; }
+  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+  int64_t getInt()                    { return Constants.Int; }
+  const ConstantFP *getConstantFP()   { return Constants.CFP; }
+  const ConstantInt *getConstantInt() { return Constants.CIP; }
 } DotDebugLocEntry;
 
 //===----------------------------------------------------------------------===//
@@ -178,12 +201,10 @@ class DwarfDebug {
 
   /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
   /// contained DbgScope*s.
-  ///
   DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
 
-  /// ConcreteScopes - Tracks the concrete scopees in the current function.
-  /// These scopes are also included in DbgScopeMap.
-  DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
+  /// InlinedDbgScopeMap - Tracks inlined function scopes in current function.
+  DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap;
 
   /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
   /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
@@ -296,7 +317,7 @@ private:
   void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
   /// getOrCreateDbgScope - Create DbgScope for the scope.
-  DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
+  DbgScope *getOrCreateDbgScope(DebugLoc DL);
 
   DbgScope *getOrCreateAbstractScope(const MDNode *N);
 
@@ -427,9 +448,8 @@ private:
   /// is found. Update FI to hold value of the index.
   bool findVariableFrameIndex(const DbgVariable *V, int *FI);
 
-  /// findDbgScope - Find DbgScope for the debug loc attached with an 
-  /// instruction.
-  DbgScope *findDbgScope(const MachineInstr *MI);
+  /// findDbgScope - Find DbgScope for the debug loc.
+  DbgScope *findDbgScope(DebugLoc DL);
 
   /// identifyScopeMarkers() - Indentify instructions that are marking
   /// beginning of or end of a scope.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 967a2783da14..1f992faaadb5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -512,6 +512,8 @@ void DwarfException::EmitExceptionTable() {
     SizeAlign = 0;
   }
 
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
   // SjLj Exception handling
   if (IsSJLJ) {
     Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
@@ -525,14 +527,30 @@ void DwarfException::EmitExceptionTable() {
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
       const CallSiteEntry &S = *I;
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(idx) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
+                                    llvm::utostr(idx));
+
+        if (S.Action == 0)
+          Asm->OutStreamer.AddComment("  Action: cleanup");
+        else
+          Asm->OutStreamer.AddComment(Twine("  Action: ") +
+                                      llvm::utostr((S.Action - 1) / 2 + 1));
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->EmitULEB128(idx, "Landing pad");
+      Asm->EmitULEB128(idx);
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   } else {
     // DWARF Exception handling
@@ -562,6 +580,7 @@ void DwarfException::EmitExceptionTable() {
     // Add extra padding if it wasn't added to the TType base offset.
     Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
+    unsigned Entry = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
@@ -576,19 +595,38 @@ void DwarfException::EmitExceptionTable() {
       if (EndLabel == 0)
         EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  Call between ") +
+                                    BeginLabel->getName() + " and " +
+                                    EndLabel->getName());
+
+        if (!S.PadLabel) {
+          Asm->OutStreamer.AddComment("    has no landing pad");
+        } else {
+          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
+                                      S.PadLabel->getName());
+
+          if (S.Action == 0)
+            Asm->OutStreamer.AddComment("  On action: cleanup");
+          else
+            Asm->OutStreamer.AddComment(Twine("  On action: ") +
+                                        llvm::utostr((S.Action - 1) / 2 + 1));
+        }
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
-      Asm->OutStreamer.AddComment("Region start");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-
-      Asm->OutStreamer.AddComment("Region length");
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->OutStreamer.AddComment("Landing pad");
       if (!S.PadLabel)
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
       else
@@ -597,45 +635,63 @@ void DwarfException::EmitExceptionTable() {
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   }
 
   // Emit the Action Table.
-  if (Actions.size() != 0) {
-    Asm->OutStreamer.AddComment("-- Action Record Table --");
-    Asm->OutStreamer.AddBlankLine();
-  }
-
+  int Entry = 0;
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
-    Asm->OutStreamer.AddComment("Action Record");
-    Asm->OutStreamer.AddBlankLine();
+
+    if (VerboseAsm) {
+      // Emit comments that decode the action table.
+      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+                                  llvm::utostr(++Entry) + " <<");
+      if (Action.ValueForTypeID >= 0)
+        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+      else 
+        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+
+      if (Action.NextAction == 0) {
+        Asm->OutStreamer.AddComment("  No further actions");
+      } else {
+        unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
+                                    llvm::utostr(NextAction));
+      }
+
+      Asm->OutStreamer.AddBlankLine();
+    }
 
     // Type Filter
     //
     //   Used by the runtime to match the type of the thrown exception to the
     //   type of the catch clauses or the types in the exception specification.
-    Asm->EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
+    Asm->EmitSLEB128(Action.ValueForTypeID);
 
     // Action Record
     //
     //   Self-relative signed displacement in bytes of the next action record,
     //   or 0 if there is no next action record.
-    Asm->EmitSLEB128(Action.NextAction, "  Next action");
+    Asm->EmitSLEB128(Action.NextAction);
   }
 
   // Emit the Catch TypeInfos.
-  if (!TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
   }
+
   for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
-
-    Asm->OutStreamer.AddComment("TypeInfo");
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
     if (GV)
       Asm->EmitReference(GV, TTypeEncoding);
     else
@@ -644,14 +700,21 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Exception Specifications.
-  if (!FilterIds.empty()) {
-    Asm->OutStreamer.AddComment("-- Filter IDs --");
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
-    Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+    }
+
+    Asm->EmitULEB128(TypeID);
   }
 
   Asm->EmitAlignment(2);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 719cd264f684..99090a8269d4 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -108,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
+  // Avoid matching if this pointer gets reused.
+  TriedMerging.erase(MBB);
+
   // Remove the block.
   MF->erase(MBB);
 }
@@ -171,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
                                     MachineModuleInfo *mmi) {
   if (!tii) return false;
 
+  TriedMerging.clear();
+
   TII = tii;
   TRI = tri;
   MMI = mmi;
@@ -361,11 +366,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   return TailLen;
 }
 
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+                                   MachineBasicBlock *NewMBB) {
+  if (RS) {
+    RS->enterBasicBlock(CurMBB);
+    if (!CurMBB->empty())
+      RS->forward(prior(CurMBB->end()));
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
+    RS->getRegsUsed(RegsLiveAtExit, false);
+    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+      if (RegsLiveAtExit[i])
+        NewMBB->addLiveIn(i);
+  }
+}
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
+  MachineBasicBlock *CurMBB = OldInst->getParent();
+
   TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  MaintainLiveIns(CurMBB, NewDest);
+
   ++NumTailMerge;
 }
 
@@ -394,16 +419,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
 
   // For targets that use the register scavenger, we must maintain LiveIns.
-  if (RS) {
-    RS->enterBasicBlock(&CurMBB);
-    if (!CurMBB.empty())
-      RS->forward(prior(CurMBB.end()));
-    BitVector RegsLiveAtExit(TRI->getNumRegs());
-    RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
-      if (RegsLiveAtExit[i])
-        NewMBB->addLiveIn(i);
-  }
+  MaintainLiveIns(&CurMBB, NewMBB);
 
   return NewMBB;
 }
@@ -416,10 +432,10 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
   for (; I != E; ++I) {
     if (I->isDebugValue())
       continue;
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isCall())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall())
       Time += 10;
-    else if (TID.mayLoad() || TID.mayStore())
+    else if (MCID.mayLoad() || MCID.mayStore())
       Time += 2;
     else
       ++Time;
@@ -799,14 +815,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   // First find blocks with no successors.
   MergePotentials.clear();
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+    if (TriedMerging.count(I))
+      continue;
     if (I->succ_empty())
       MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
   }
 
+  // If this is a large problem, avoid visiting the same basic blocks
+  // multiple times.
+  if (MergePotentials.size() == TailMergeThreshold)
+    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+      TriedMerging.insert(MergePotentials[i].getBlock());
   // See if we can do any tail merging on those.
-  if (MergePotentials.size() < TailMergeThreshold &&
-      MergePotentials.size() >= 2)
+  if (MergePotentials.size() >= 2)
     MadeChange |= TryTailMergeBlocks(NULL, NULL);
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
@@ -830,15 +853,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
        I != E; ++I) {
-    if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+    if (I->pred_size() >= 2) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
       for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
                                             E2 = I->pred_end();
-           P != E2; ++P) {
+           P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
         MachineBasicBlock *PBB = *P;
+        if (TriedMerging.count(PBB))
+          continue;
         // Skip blocks that loop to themselves, can't tail merge these.
         if (PBB == IBB)
           continue;
@@ -891,6 +916,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
         }
       }
+      // If this is a large problem, avoid visiting the same basic blocks
+      // multiple times.
+      if (MergePotentials.size() == TailMergeThreshold)
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          TriedMerging.insert(MergePotentials[i].getBlock());
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 4daf4ecfe599..df795dfc248e 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
 #define LLVM_CODEGEN_BRANCHFOLDING_HPP
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include <vector>
 
@@ -47,6 +48,7 @@ namespace llvm {
     };
     typedef std::vector<MergePotentialsElt>::iterator MPIterator;
     std::vector<MergePotentialsElt> MergePotentials;
+    SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
 
     class SameTailElt {
       MPIterator MPIter;
@@ -93,6 +95,8 @@ namespace llvm {
     bool TailMergeBlocks(MachineFunction &MF);
     bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
                        MachineBasicBlock* PredBB);
+    void MaintainLiveIns(MachineBasicBlock *CurMBB,
+                         MachineBasicBlock *NewMBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index c726d924d281..06d2a959ac84 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -33,6 +33,8 @@ add_llvm_library(LLVMCodeGen
   LocalStackSlotAllocation.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
+  MachineBlockFrequency.cpp
+  MachineBranchProbabilityInfo.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -58,7 +60,6 @@ add_llvm_library(LLVMCodeGen
   Passes.cpp
   PeepholeOptimizer.cpp
   PostRASchedulerList.cpp
-  PreAllocSplitting.cpp
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
@@ -78,7 +79,6 @@ add_llvm_library(LLVMCodeGen
   ScoreboardHazardRecognizer.cpp
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
-  SimpleRegisterCoalescing.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
   Spiller.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 5d722ee34f7e..e6b3bbca2068 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -188,6 +188,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
   const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
   const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
@@ -202,8 +203,11 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
     // TRI doesn't have accurate enough information to model this yet.
     if (I.getOperand().getSubReg())
       return;
+    // Inline asm instuctions don't remember their constraints.
+    if (I->isInlineAsm())
+      return;
     const TargetRegisterClass *OpRC =
-      I->getDesc().getRegClass(I.getOperandNo(), TRI);
+      TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI);
     if (OpRC)
       NewRC = getCommonSubClass(NewRC, OpRC);
     if (!NewRC || NewRC == OldRC)
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 515e6f9fde87..489746cf3c72 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -37,13 +37,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeOptimizePHIsPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
-  initializePreAllocSplittingPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializePEIPass(Registry);
   initializeRALinScanPass(Registry);
-  initializeRegisterCoalescerAnalysisGroup(Registry);
+  initializeRegisterCoalescerPass(Registry);
   initializeRenderMachineFunctionPass(Registry);
-  initializeSimpleRegisterCoalescingPass(Registry);
   initializeSlotIndexesPass(Registry);
   initializeLoopSplitterPass(Registry);
   initializeStackProtectorPass(Registry);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 51d984ffac0f..84c4d59c0e41 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -27,12 +27,12 @@
 using namespace llvm;
 
 CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) :
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
   DefIndices(TRI->getNumRegs(), 0) {}
@@ -207,7 +207,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
     const TargetRegisterClass *NewRC = 0;
 
     if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -295,7 +295,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 
     const TargetRegisterClass *NewRC = 0;
     if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
 {
-  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-       RE = RC->allocation_order_end(MF); R != RE; ++R) {
-    unsigned NewReg = *R;
-    // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewReg)) continue;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned NewReg = Order[i];
     // Don't replace a register with itself.
     if (NewReg == AntiDepReg) continue;
     // Don't replace a register with one that was recently used to repair
@@ -534,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!AllocatableSet.test(AntiDepReg))
+          if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.count(AntiDepReg))
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 5bbb8f525f54..07107802972d 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,6 +17,7 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,6 +28,7 @@
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
@@ -35,6 +37,7 @@ class TargetRegisterInfo;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const RegisterClassInfo &RegClassInfo;
 
     /// AllocatableSet - The set of allocatable registers.
     /// We'll be ignoring anti-dependencies on non-allocatable registers,
@@ -66,7 +69,7 @@ class TargetRegisterInfo;
     SmallSet<unsigned, 4> KeepRegs;
 
   public:
-    CriticalAntiDepBreaker(MachineFunction& MFi);
+    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
     ~CriticalAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index fdc1d9142140..6de6c0cb81bd 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -110,9 +110,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           LivePhysRegs.set(Reg);
       }
 
-    // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
-    // are not live across blocks, but some targets (x86) can have flags live
-    // out of a block.
+    // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+    // live across blocks, but some targets (x86) can have flags live out of a
+    // block.
+    for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+           E = MBB->succ_end(); S != E; S++)
+      for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+           LI != (*S)->livein_end(); LI++)
+        LivePhysRegs.set(*LI);
 
     // Now scan the instructions and delete dead ones, tracking physreg
     // liveness as we go.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 22c5465bf9fa..03604b0a170f 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -336,8 +336,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
       Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
 
       CallInst *NewSelector =
-        CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
-                         "eh.sel.catch.all", II);
+        CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
 
       NewSelector->setTailCall(II->isTailCall());
       NewSelector->setAttributes(II->getAttributes());
@@ -497,10 +496,8 @@ bool DwarfEHPrepare::LowerUnwindsAndResumes() {
   // Find the rewind function if we didn't already.
   if (!RewindFunction) {
     LLVMContext &Ctx = ResumeInsts[0]->getContext();
-    std::vector<const Type*>
-      Params(1, Type::getInt8PtrTy(Ctx));
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
-                                          Params, false);
+                                          Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
     RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
   }
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index fa2319bff704..d977651c32f7 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -659,11 +659,11 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
 /// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
   // Should be an array of '{ i32, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (List->isNullValue()) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     if (InitList->getOperand(i)->isNullValue())
       continue;
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b8bac5598ecf..6f7fbace8aba 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -232,7 +232,7 @@ namespace llvm {
     void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
                                   ELFSection &GblS, int64_t Offset = 0);
     bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
+    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
     void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 646e01407a4f..a7aba89b87f3 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
 bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   EC.clear();
-  EC.grow(2 * MF->size());
+  EC.grow(2 * MF->getNumBlockIDs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
        ++I) {
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index ebc2fc91efa3..a67140ece4a5 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -62,8 +62,8 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = MBBI++;
 
       // If MI is a pseudo, expand it.
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.usesCustomInsertionHook()) {
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.usesCustomInsertionHook()) {
         Changed = true;
         MachineBasicBlock *NewMBB =
           TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 8b2c98161644..6cb22778caf9 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -18,11 +18,12 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -173,10 +174,10 @@ namespace {
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
     bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                     float Prediction, float Confidence) const;
+                     const BranchProbability &Prediction) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                        bool FalseBranch, unsigned &Dups,
-                       float Prediction, float Confidence) const;
+                       const BranchProbability &Prediction) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
@@ -203,19 +204,19 @@ namespace {
 
     bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
                             unsigned Cycle, unsigned Extra,
-                            float Prediction, float Confidence) const {
+                            const BranchProbability &Prediction) const {
       return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
-                                                   Prediction, Confidence);
+                                                   Prediction);
     }
 
     bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
                             unsigned TCycle, unsigned TExtra,
                             MachineBasicBlock &FBB,
                             unsigned FCycle, unsigned FExtra,
-                            float Prediction, float Confidence) const {
+                            const BranchProbability &Prediction) const {
       return TCycle > 0 && FCycle > 0 &&
         TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
-                                 Prediction, Confidence);
+                                 Prediction);
     }
 
     // blockAlwaysFallThrough - Block ends without a terminator.
@@ -450,7 +451,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
 /// number of instructions that the ifcvt would need to duplicate if performed
 /// in Dups.
 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                              float Prediction, float Confidence) const {
+                              const BranchProbability &Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -461,7 +462,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
   if (TrueBBI.BB->pred_size() > 1) {
     if (TrueBBI.CannotBeCopied ||
         !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
-                                        Prediction, Confidence))
+                                        Prediction))
       return false;
     Dups = TrueBBI.NonPredSize;
   }
@@ -477,7 +478,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
 /// if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                                 bool FalseBranch, unsigned &Dups,
-                                float Prediction, float Confidence) const {
+                                const BranchProbability &Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -499,8 +500,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
           ++Size;
       }
     }
-    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
-                                        Prediction, Confidence))
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
       return false;
     Dups = Size;
   }
@@ -651,12 +651,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (I->isDebugValue())
       continue;
 
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isNotDuplicable())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isNotDuplicable())
       BBI.CannotBeCopied = true;
 
     bool isPredicated = TII->isPredicated(I);
-    bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
 
     if (!isCondBr) {
       if (!isPredicated) {
@@ -751,8 +751,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   ScanInstructions(BBI);
 
-  // Unanalyzable or ends with fallthrough or unconditional branch.
-  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+  // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+  // considered for ifcvt anymore.
+  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
     BBI.IsBeingAnalyzed = false;
     BBI.IsAnalyzed = true;
     return BBI;
@@ -795,21 +796,20 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   //   - backedge -> 90% taken
   //   - early exit -> 20% taken
   //   - branch predictor confidence -> 90%
-  float Prediction = 0.5f;
-  float Confidence = 0.9f;
+  BranchProbability Prediction(5, 10);
   MachineLoop *Loop = MLI->getLoopFor(BB);
   if (Loop) {
     if (TrueBBI.BB == Loop->getHeader())
-      Prediction = 0.9f;
+      Prediction = BranchProbability(9, 10);
     else if (FalseBBI.BB == Loop->getHeader())
-      Prediction = 0.1f;
+      Prediction = BranchProbability(1, 10);
 
     MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
     MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
     if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
-      Prediction = 0.2f;
+      Prediction = BranchProbability(2, 10);
     else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
-      Prediction = 0.8f;
+      Prediction = BranchProbability(8, 10);
   }
   
   if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
@@ -817,7 +817,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                                        TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
                          *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
                                         FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
-                         Prediction, Confidence) &&
+                         Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
       FeasibilityAnalysis(FalseBBI, RevCond)) {
     // Diamond:
@@ -833,9 +833,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
     // Triangle:
     //   EBB
@@ -848,17 +848,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
     Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
     Enqueued = true;
   }
 
-  if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+  if (ValidSimple(TrueBBI, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
     // Simple (split, no rejoin):
     //   EBB
@@ -874,29 +874,29 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   if (CanRevCond) {
     // Try the other path...
     if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
-                      1.0-Prediction, Confidence) &&
+                      Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
       Enqueued = true;
     }
 
     if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
-                      1.0-Prediction, Confidence) &&
+                      Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+    if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond)) {
       Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -1414,9 +1414,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
   for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
          E = FromBBI.BB->end(); I != E; ++I) {
-    const TargetInstrDesc &TID = I->getDesc();
+    const MCInstrDesc &MCID = I->getDesc();
     // Do not copy the end of the block branches.
-    if (IgnoreBr && TID.isBranch())
+    if (IgnoreBr && MCID.isBranch())
       break;
 
     MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 19ae333115c0..5547f735ba5e 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -180,11 +180,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
 /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
 /// otherwise return 0.
 static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
-  if (!MI->isCopy())
-    return 0;
-  if (MI->getOperand(0).getSubReg() != 0)
-    return 0;
-  if (MI->getOperand(1).getSubReg() != 0)
+  if (!MI->isFullCopy())
     return 0;
   if (MI->getOperand(0).getReg() == Reg)
       return MI->getOperand(1).getReg();
@@ -307,7 +303,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
   // Best spill candidate seen so far. This must dominate UseVNI.
   SibValueInfo SVI(UseReg, UseVNI);
   MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
-  unsigned SpillDepth = Loops.getLoopDepth(UseMBB);
+  MachineBasicBlock *SpillMBB = UseMBB;
+  unsigned SpillDepth = Loops.getLoopDepth(SpillMBB);
   bool SeenOrigPHI = false; // Original PHI met.
 
   do {
@@ -320,7 +317,30 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
     // Is this value a better spill candidate?
     if (!isRegToSpill(Reg)) {
       MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
-      if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
+      if (MBB == SpillMBB) {
+        // This is an alternative def earlier in the same MBB.
+        // Hoist the spill as far as possible in SpillMBB. This can ease
+        // register pressure:
+        //
+        //   x = def
+        //   y = use x
+        //   s = copy x
+        //
+        // Hoisting the spill of s to immediately after the def removes the
+        // interference between x and y:
+        //
+        //   x = def
+        //   spill x
+        //   y = use x<kill>
+        //
+        if (VNI->def < SVI.SpillVNI->def) {
+          DEBUG(dbgs() << "  hoist in BB#" << MBB->getNumber() << ": "
+                       << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+                       << '\n');
+          SVI.SpillReg = Reg;
+          SVI.SpillVNI = VNI;
+        }
+      } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
         // This is a valid spill location dominating UseVNI.
         // Prefer to spill at a smaller loop depth.
         unsigned Depth = Loops.getLoopDepth(MBB);
@@ -329,6 +349,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
                        << ':' << VNI->id << '@' << VNI->def << '\n');
           SVI.SpillReg = Reg;
           SVI.SpillVNI = VNI;
+          SpillMBB = MBB;
           SpillDepth = Depth;
         }
       }
@@ -429,6 +450,7 @@ void InlineSpiller::analyzeSiblingValues() {
       // Check possible sibling copies.
       if (VNI->isPHIDef() || VNI->getCopy()) {
         VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+        assert(OrigVNI && "Def outside original live range");
         if (OrigVNI->def != VNI->def)
           DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
       }
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index b1014a97fa03..a09bb39f8336 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "regalloc"
 #include "InterferenceCache.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -40,9 +41,18 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
   E = RoundRobin;
   if (++RoundRobin == CacheEntries)
     RoundRobin = 0;
-  Entries[E].reset(PhysReg, LIUArray, TRI, MF);
-  PhysRegEntries[PhysReg] = E;
-  return &Entries[E];
+  for (unsigned i = 0; i != CacheEntries; ++i) {
+    // Skip entries that are in use.
+    if (Entries[E].hasRefs()) {
+      if (++E == CacheEntries)
+        E = 0;
+      continue;
+    }
+    Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+    PhysRegEntries[PhysReg] = E;
+    return &Entries[E];
+  }
+  llvm_unreachable("Ran out of interference cache entries.");
 }
 
 /// revalidate - LIU contents have changed, update tags.
@@ -59,6 +69,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
                                      LiveIntervalUnion *LIUArray,
                                      const TargetRegisterInfo *TRI,
                                      const MachineFunction *MF) {
+  assert(!hasRefs() && "Cannot reset cache entry with references");
   // LIU's changed, invalidate cache.
   ++Tag;
   PhysReg = physReg;
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 6c36fa4021fb..7f0a27a41baa 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -43,6 +43,9 @@ class InterferenceCache {
     /// change.
     unsigned Tag;
 
+    /// RefCount - The total number of Cursor instances referring to this Entry.
+    unsigned RefCount;
+
     /// MF - The current function.
     MachineFunction *MF;
 
@@ -68,9 +71,10 @@ class InterferenceCache {
     void update(unsigned MBBNum);
 
   public:
-    Entry() : PhysReg(0), Tag(0), Indexes(0) {}
+    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
 
     void clear(MachineFunction *mf, SlotIndexes *indexes) {
+      assert(!hasRefs() && "Cannot clear cache entry with references");
       PhysReg = 0;
       MF = mf;
       Indexes = indexes;
@@ -78,6 +82,10 @@ class InterferenceCache {
 
     unsigned getPhysReg() const { return PhysReg; }
 
+    void addRef(int Delta) { RefCount += Delta; }
+
+    bool hasRefs() const { return RefCount > 0; }
+
     void revalidate();
 
     /// valid - Return true if this is a valid entry for physReg.
@@ -122,15 +130,48 @@ public:
   void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
             const TargetRegisterInfo *);
 
+  /// getMaxCursors - Return the maximum number of concurrent cursors that can
+  /// be supported.
+  unsigned getMaxCursors() const { return CacheEntries; }
+
   /// Cursor - The primary query interface for the block interference cache.
   class Cursor {
     Entry *CacheEntry;
     BlockInterference *Current;
+
+    void setEntry(Entry *E) {
+      Current = 0;
+      // Update reference counts. Nothing happens when RefCount reaches 0, so
+      // we don't have to check for E == CacheEntry etc.
+      if (CacheEntry)
+        CacheEntry->addRef(-1);
+      CacheEntry = E;
+      if (CacheEntry)
+        CacheEntry->addRef(+1);
+    }
+
   public:
-    /// Cursor - Create a cursor for the interference allocated to PhysReg and
-    /// all its aliases.
-    Cursor(InterferenceCache &Cache, unsigned PhysReg)
-      : CacheEntry(Cache.get(PhysReg)), Current(0) {}
+    /// Cursor - Create a dangling cursor.
+    Cursor() : CacheEntry(0), Current(0) {}
+    ~Cursor() { setEntry(0); }
+
+    Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+      setEntry(O.CacheEntry);
+    }
+
+    Cursor &operator=(const Cursor &O) {
+      setEntry(O.CacheEntry);
+      return *this;
+    }
+
+    /// setPhysReg - Point this cursor to PhysReg's interference.
+    void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+      // Release reference before getting a new one. That guarantees we can
+      // actually have CacheEntries live cursors.
+      setEntry(0);
+      if (PhysReg)
+        setEntry(Cache.get(PhysReg));
+    }
 
     /// moveTo - Move cursor to basic block MBBNum.
     void moveToBlock(unsigned MBBNum) {
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3861ddadf655..611886ff16a1 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -29,7 +29,7 @@ static void EnsureFunctionExists(Module &M, const char *Name,
                                  ArgIt ArgBegin, ArgIt ArgEnd,
                                  const Type *RetTy) {
   // Insert a correctly-typed definition now.
-  std::vector<const Type *> ParamTys;
+  std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
     ParamTys.push_back(I->getType());
   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
@@ -69,7 +69,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   // program already contains a function with this name.
   Module *M = CI->getParent()->getParent()->getParent();
   // Get or insert the definition now.
-  std::vector<const Type *> ParamTys;
+  std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
     ParamTys.push_back((*I)->getType());
   Constant* FCache = M->getOrInsertFunction(NewFn,
@@ -77,7 +77,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 
   IRBuilder<> Builder(CI->getParent(), CI);
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
-  CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+  CallInst *NewCI = Builder.CreateCall(FCache, Args);
   NewCI->setName(CI->getName());
   if (!CI->use_empty())
     CI->replaceAllUsesWith(NewCI);
@@ -353,6 +353,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     report_fatal_error("Code generator does not support intrinsic function '"+
                       Callee->getName()+"'!");
 
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    Value *V = CI->getArgOperand(0);
+    CI->replaceAllUsesWith(V);
+    break;
+  }
+
     // The setjmp/longjmp intrinsics should only exist in the code if it was
     // never optimized (ie, right out of the CFE), or if it has been hacked on
     // by the lowerinvoke pass.  In both cases, the right thing to do is to
@@ -546,14 +553,13 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
       !CI->getType()->isIntegerTy())
     return false;
 
-  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
   if (!Ty)
     return false;
 
   // Okay, we can do this xform, do so now.
-  const Type *Tys[] = { Ty };
   Module *M = CI->getParent()->getParent()->getParent();
-  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
 
   Value *Op = CI->getArgOperand(0);
   Op = CallInst::Create(Int, Op, CI->getName(), CI);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 589d0a9a671d..f985af8ba83e 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -24,10 +24,14 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
@@ -98,10 +102,10 @@ static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
-LLVMTargetMachine::LLVMTargetMachine(const Target &T,
-                                     const std::string &Triple)
-  : TargetMachine(T), TargetTriple(Triple) {
-  AsmInfo = T.createAsmInfo(TargetTriple);
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+                                     StringRef CPU, StringRef FS)
+  : TargetMachine(T, Triple, CPU, FS) {
+  AsmInfo = T.createMCAsmInfo(Triple);
 }
 
 // Set the default code model for the JIT for a generic target.
@@ -136,14 +140,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   default: return true;
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI);
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
     TargetAsmBackend *TAB = 0;
     if (ShowMCEncoding) {
-      MCE = getTarget().createCodeEmitter(*this, *Context);
-      TAB = getTarget().createAsmBackend(TargetTriple);
+      const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+      MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context);
+      TAB = getTarget().createAsmBackend(getTargetTriple());
     }
 
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
@@ -159,13 +164,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
-    TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+    const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI,
+                                                       *Context);
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
     if (MCE == 0 || TAB == 0)
       return true;
 
-    AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
-                                                       *TAB, Out, MCE,
+    AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(),
+                                                       *Context, *TAB, Out, MCE,
                                                        hasMCRelaxAll(),
                                                        hasMCNoExecStack()));
     AsmStreamer.get()->InitSections();
@@ -240,13 +247,14 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
-  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx);
-  TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx);
+  TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
   if (MCE == 0 || TAB == 0)
     return true;
 
   OwningPtr<MCStreamer> AsmStreamer;
-  AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx,
+  AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx,
                                                      *TAB, Out, MCE,
                                                      hasMCRelaxAll(),
                                                      hasMCNoExecStack()));
@@ -384,6 +392,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Expand pseudo-instructions emitted by ISel.
   PM.add(createExpandISelPseudosPass());
 
+  // Pre-ra tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+    PM.add(createTailDuplicatePass(true));
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+  }
+
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
   // instructions dead.
   if (OptLevel != CodeGenOpt::None)
@@ -412,12 +426,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     printAndVerify(PM, "After codegen peephole optimization pass");
   }
 
-  // Pre-ra tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
-    PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
-  }
-
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
     printAndVerify(PM, "After PreRegAlloc passes");
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 292928f8e787..5d38c83b49c2 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -123,7 +123,7 @@ public:
   /// getNext - Return the next UserValue in the equivalence class.
   UserValue *getNext() const { return next; }
 
-  /// match - Does this UserValue match the aprameters?
+  /// match - Does this UserValue match the parameters?
   bool match(const MDNode *Var, unsigned Offset) const {
     return Var == variable && Offset == offset;
   }
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index b67f96667bfd..70003e7cc86a 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -244,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
 //
 // For comments on how to speed it up, see Query::findIntersection().
 unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
   InterferenceResult IR = firstInterference();
   LiveInterval::iterator VirtRegEnd = VirtReg->end();
   LiveInterval *RecentInterferingVReg = NULL;
@@ -287,10 +287,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
       RecentInterferingVReg = IR.LiveUnionI.value();
       ++IR.LiveUnionI;
 
-      // Stop collecting when the max weight is exceeded.
-      if (RecentInterferingVReg->weight >= MaxWeight)
-        return InterferingVRegs.size();
-
       continue;
     }
     // VirtRegI may have advanced far beyond LiveUnionI,
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index c83578e99c6c..5e78d5e85029 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -229,8 +229,7 @@ public:
 
     // Count the virtual registers in this union that interfere with this
     // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX,
-                                     float MaxWeight = HUGE_VALF);
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
 
     // Was this virtual register visited during collectInterferingVRegs?
     bool isSeenInterference(LiveInterval *VReg) const;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 052abad57e16..b385fb36bbf1 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -298,10 +298,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     if (NumComp <= 1)
       continue;
     ++NumFracRanges;
+    bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
     DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
     SmallVector<LiveInterval*, 8> Dups(1, LI);
     for (unsigned i = 1; i != NumComp; ++i) {
       Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      // If LI is an original interval that hasn't been split yet, make the new
+      // intervals their own originals instead of referring to LI. The original
+      // interval must contain all the split products, and LI doesn't.
+      if (IsOriginal)
+        VRM.setIsSplitFromReg(Dups.back()->reg, 0);
       if (delegate_)
         delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
     }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 68946a2c9d13..8f0fb46879ac 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -22,7 +22,6 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Assembly/Writer.h"
@@ -61,7 +60,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   return OS;
 }
 
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the 
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
 /// parent pointer of the MBB, the MBB numbering, and any instructions in the
 /// MBB to be on the right operand list for registers.
 ///
@@ -93,7 +92,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
 void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(N->getParent() == 0 && "machine instruction already in a basic block");
   N->setParent(Parent);
-  
+
   // Add the instruction's register operands to their corresponding
   // use/def lists.
   MachineFunction *MF = Parent->getParent();
@@ -110,7 +109,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 
   // Remove from the use/def lists.
   N->RemoveRegOperandsFromUseLists();
-  
+
   N->setParent(0);
 
   LeakDetector::addGarbageObject(N);
@@ -339,25 +338,64 @@ void MachineBasicBlock::updateTerminator() {
   }
 }
 
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
-  Successors.push_back(succ);
-  succ->addPredecessor(this);
-}
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+  // If we see non-zero value for the first time it means we actually use Weight
+  // list, so we fill all Weights with 0's.
+  if (weight != 0 && Weights.empty())
+    Weights.resize(Successors.size());
+
+  if (weight != 0 || !Weights.empty())
+    Weights.push_back(weight);
+
+   Successors.push_back(succ);
+   succ->addPredecessor(this);
+ }
 
 void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
   succ->removePredecessor(this);
   succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   Successors.erase(I);
 }
 
-MachineBasicBlock::succ_iterator 
+MachineBasicBlock::succ_iterator
 MachineBasicBlock::removeSuccessor(succ_iterator I) {
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   (*I)->removePredecessor(this);
   return Successors.erase(I);
 }
 
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+                                         MachineBasicBlock *New) {
+  uint32_t weight = 0;
+  succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(SI);
+    weight = *WI;
+  }
+
+  // Update the successor information.
+  removeSuccessor(SI);
+  addSuccessor(New, weight);
+}
+
 void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
   Predecessors.push_back(pred);
 }
@@ -371,10 +409,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
 void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    addSuccessor(Succ);
+    uint32_t weight = 0;
+
+
+    // If Weight list is empty it means we don't use it (disabled optimization).
+    if (!fromMBB->Weights.empty())
+      weight = *fromMBB->Weights.begin();
+
+    addSuccessor(Succ, weight);
     fromMBB->removeSuccessor(Succ);
   }
 }
@@ -383,7 +428,7 @@ void
 MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
     addSuccessor(Succ);
@@ -637,15 +682,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
   }
 
   // Update the successor information.
-  removeSuccessor(Old);
-  addSuccessor(New);
+  replaceSuccessor(Old, New);
 }
 
 /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
 /// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
 /// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
 /// null.
-/// 
+///
 /// Besides DestA and DestB, retain other edges leading to LandingPads
 /// (currently there can be only one; we don't check or require that here).
 /// Note it is possible that DestA and/or DestB are LandingPads.
@@ -720,6 +764,26 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
   return DL;
 }
 
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+  if (Weights.empty())
+    return 0;
+
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequency.cpp b/lib/CodeGen/MachineBlockFrequency.cpp
new file mode 100644
index 000000000000..893a320a6a63
--- /dev/null
+++ b/lib/CodeGen/MachineBlockFrequency.cpp
@@ -0,0 +1,59 @@
+//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequency.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq",
+                      "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq",
+                    "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequency::ID = 0;
+
+
+MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) {
+  initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+                                MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequency::~MachineBlockFrequency() {
+  delete MBFI;
+}
+
+void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) {
+  MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI->doFunction(&F, &MBPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) {
+  return MBFI->getBlockFreq(MBB);
+}
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..c13fa6bc5333
--- /dev/null
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+                      "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+                    "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) const {
+  uint32_t Weight = Src->getSuccWeight(Dst);
+  if (!Weight)
+    return DEFAULT_WEIGHT;
+  return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+                                             MachineBasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  MachineBasicBlock *MaxSucc = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) const {
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+                     MachineBasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+     << " probability is "  << Prob 
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index f97ccf65790f..3a60a37af443 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -260,12 +260,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     return false;
 
   // Ignore stuff that we obviously can't move.
-  const TargetInstrDesc &TID = MI->getDesc();  
-  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+  const MCInstrDesc &MCID = MI->getDesc();  
+  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
-  if (TID.mayLoad()) {
+  if (MCID.mayLoad()) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 50750a50ab89..cd2515652831 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -152,10 +152,10 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
 /// of `new MachineInstr'.
 ///
 MachineInstr *
-MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
                                     DebugLoc DL, bool NoImp) {
   return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
-    MachineInstr(TID, DL, NoImp);
+    MachineInstr(MCID, DL, NoImp);
 }
 
 /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 36b0b8330a86..143a29b08a1e 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,19 +15,22 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
@@ -194,6 +197,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
            getSubReg() == Other.getSubReg();
   case MachineOperand::MO_Immediate:
     return getImm() == Other.getImm();
+  case MachineOperand::MO_CImmediate:
+    return getCImm() == Other.getCImm();
   case MachineOperand::MO_FPImmediate:
     return getFPImm() == Other.getFPImm();
   case MachineOperand::MO_MachineBasicBlock:
@@ -267,6 +272,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_Immediate:
     OS << getImm();
     break;
+  case MachineOperand::MO_CImmediate:
+    getCImm()->getValue().print(OS, false);
+    break;
   case MachineOperand::MO_FPImmediate:
     if (getFPImm()->getType()->isFloatTy())
       OS << getFPImm()->getValueAPF().convertToFloat();
@@ -454,9 +462,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 //===----------------------------------------------------------------------===//
 
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// TID NULL and no operands.
+/// MCID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
@@ -464,23 +472,23 @@ MachineInstr::MachineInstr()
 }
 
 void MachineInstr::addImplicitDefUseOperands() {
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+  if (MCID->ImplicitDefs)
+    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
       addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+  if (MCID->ImplicitUses)
+    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
 /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
 /// implicit operands. It reserves space for the number of operands specified by
-/// the TargetInstrDesc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   if (!NoImp)
-    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -488,13 +496,13 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 }
 
 /// MachineInstr ctor - As above, but with a DebugLoc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   if (!NoImp)
-    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -504,12 +512,12 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -519,12 +527,12 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-                           const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+                           const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -534,7 +542,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
@@ -621,7 +629,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
         Operands.back().AddRegOperandToRegInfo(RegInfo);
         // If the register operand is flagged as early, mark the operand as such
         unsigned OpNo = Operands.size() - 1;
-        if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
           Operands[OpNo].setIsEarlyClobber(true);
       }
       return;
@@ -643,7 +651,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
     if (Operands[OpNo].isReg()) {
       Operands[OpNo].AddRegOperandToRegInfo(0);
       // If the register operand is flagged as early, mark the operand as such
-      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
         Operands[OpNo].setIsEarlyClobber(true);
     }
 
@@ -668,7 +676,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
     if (Operands[OpNo].isReg()) {
       Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
       // If the register operand is flagged as early, mark the operand as such
-      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
         Operands[OpNo].setIsEarlyClobber(true);
     }
     
@@ -691,7 +699,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 
       // If the register operand is flagged as early, mark the operand as such
     if (Operands[OpNo].isReg()
-        && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
       Operands[OpNo].setIsEarlyClobber(true);
   }
 }
@@ -794,6 +802,11 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
         return false;
     }
   }
+  // If DebugLoc does not match then two dbg.values are not identical.
+  if (isDebugValue())
+    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+        && getDebugLoc() != Other->getDebugLoc())
+      return false;
   return true;
 }
 
@@ -817,8 +830,8 @@ void MachineInstr::eraseFromParent() {
 /// OperandComplete - Return true if it's illegal to add a new operand
 ///
 bool MachineInstr::OperandsComplete() const {
-  unsigned short NumOperands = TID->getNumOperands();
-  if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+  unsigned short NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
     return true;  // Broken: we have all the operands of this instruction!
   return false;
 }
@@ -826,8 +839,8 @@ bool MachineInstr::OperandsComplete() const {
 /// getNumExplicitOperands - Returns the number of non-implicit operands.
 ///
 unsigned MachineInstr::getNumExplicitOperands() const {
-  unsigned NumOperands = TID->getNumOperands();
-  if (!TID->isVariadic())
+  unsigned NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic())
     return NumOperands;
 
   for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
@@ -928,10 +941,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
 /// operand list that is used to represent the predicate. It returns -1 if
 /// none is found.
 int MachineInstr::findFirstPredOperandIdx() const {
-  const TargetInstrDesc &TID = getDesc();
-  if (TID.isPredicable()) {
+  const MCInstrDesc &MCID = getDesc();
+  if (MCID.isPredicable()) {
     for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-      if (TID.OpInfo[i].isPredicate())
+      if (MCID.OpInfo[i].isPredicate())
         return i;
   }
 
@@ -987,11 +1000,11 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
   }
 
   assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
-  const TargetInstrDesc &TID = getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+  const MCInstrDesc &MCID = getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     if (MO.isReg() && MO.isUse() &&
-        TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+        MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
       if (UseOpIdx)
         *UseOpIdx = (unsigned)i;
       return true;
@@ -1047,13 +1060,13 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     return false;
   }
 
-  const TargetInstrDesc &TID = getDesc();
-  if (UseOpIdx >= TID.getNumOperands())
+  const MCInstrDesc &MCID = getDesc();
+  if (UseOpIdx >= MCID.getNumOperands())
     return false;
   const MachineOperand &MO = getOperand(UseOpIdx);
   if (!MO.isReg() || !MO.isUse())
     return false;
-  int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+  int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
   if (DefIdx == -1)
     return false;
   if (DefOpIdx)
@@ -1093,11 +1106,11 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
 
 /// copyPredicates - Copies predicate operand(s) from MI.
 void MachineInstr::copyPredicates(const MachineInstr *MI) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate()) {
+    if (MCID.OpInfo[i].isPredicate()) {
       // Predicated operands must be last operands.
       addOperand(MI->getOperand(i));
     }
@@ -1134,13 +1147,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (TID->mayStore() || TID->isCall()) {
+  if (MCID->mayStore() || MCID->isCall()) {
     SawStore = true;
     return false;
   }
 
   if (isLabel() || isDebugValue() ||
-      TID->isTerminator() || hasUnmodeledSideEffects())
+      MCID->isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1148,7 +1161,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (TID->mayLoad() && !isInvariantLoad(AA))
+  if (MCID->mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
@@ -1188,9 +1201,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
 /// have no volatile memory references.
 bool MachineInstr::hasVolatileMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
-  if (!TID->mayStore() &&
-      !TID->mayLoad() &&
-      !TID->isCall() &&
+  if (!MCID->mayStore() &&
+      !MCID->mayLoad() &&
+      !MCID->isCall() &&
       !hasUnmodeledSideEffects())
     return false;
 
@@ -1214,7 +1227,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
-  if (!TID->mayLoad())
+  if (!MCID->mayLoad())
     return false;
 
   // If the instruction has lost its memoperands, conservatively assume that
@@ -1364,6 +1377,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
   bool FirstOp = true;
+  unsigned AsmDescOp = ~0u;
+  unsigned AsmOpCount = 0;
 
   if (isInlineAsm()) {
     // Print asm string.
@@ -1377,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
       OS << " [alignstack]";
 
-    StartOp = InlineAsm::MIOp_FirstOperand;
+    StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
     FirstOp = false;
   }
 
@@ -1416,10 +1431,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (FirstOp) FirstOp = false; else OS << ",";
     OS << " ";
     if (i < getDesc().NumOperands) {
-      const TargetOperandInfo &TOI = getDesc().OpInfo[i];
-      if (TOI.isPredicate())
+      const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+      if (MCOI.isPredicate())
         OS << "pred:";
-      if (TOI.isOptionalDef())
+      if (MCOI.isOptionalDef())
         OS << "opt:";
     }
     if (isDebugValue() && MO.isMetadata()) {
@@ -1431,6 +1446,26 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
         MO.print(OS, TM);
     } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
       OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+    } else if (i == AsmDescOp && MO.isImm()) {
+      // Pretty print the inline asm operand descriptor.
+      OS << '$' << AsmOpCount++;
+      unsigned Flag = MO.getImm();
+      switch (InlineAsm::getKind(Flag)) {
+      case InlineAsm::Kind_RegUse:             OS << ":[reguse]"; break;
+      case InlineAsm::Kind_RegDef:             OS << ":[regdef]"; break;
+      case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break;
+      case InlineAsm::Kind_Clobber:            OS << ":[clobber]"; break;
+      case InlineAsm::Kind_Imm:                OS << ":[imm]"; break;
+      case InlineAsm::Kind_Mem:                OS << ":[mem]"; break;
+      default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break;
+      }
+
+      unsigned TiedTo = 0;
+      if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+        OS << " [tiedto:$" << TiedTo << ']';
+
+      // Compute the index of the next operand descriptor.
+      AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
     } else
       MO.print(OS, TM);
   }
@@ -1685,3 +1720,24 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
   }
   return Hash;
 }
+
+void MachineInstr::emitError(StringRef Msg) const {
+  // Find the source location cookie.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = getNumOperands(); i != 0; --i) {
+    if (getOperand(i-1).isMetadata() &&
+        (LocMD = getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  if (const MachineBasicBlock *MBB = getParent())
+    if (const MachineFunction *MF = MBB->getParent())
+      return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+  report_fatal_error(Msg);
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index b315702eef8f..722ceb202439 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,10 +28,10 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
@@ -1018,9 +1018,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
                                     /*UnfoldStore=*/false,
                                     &LoadRegIndex);
   if (NewOpc == 0) return 0;
-  const TargetInstrDesc &TID = TII->get(NewOpc);
-  if (TID.getNumDefs() != 1) return 0;
-  const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+  const MCInstrDesc &MID = TII->get(NewOpc);
+  if (MID.getNumDefs() != 1) return 0;
+  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   unsigned Reg = MRI->createVirtualRegister(RC);
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 08ff5bb71521..4b3e64c25f60 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -20,7 +20,6 @@ using namespace llvm;
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
   UsedPhysRegs.resize(TRI.getNumRegs());
   
   // Create the physreg use/def lists.
@@ -38,25 +37,13 @@ MachineRegisterInfo::~MachineRegisterInfo() {
            "PhysRegUseDefLists has entries after all instructions are deleted");
 #endif
   delete [] PhysRegUseDefLists;
-  delete [] RegClass2VRegMap;
 }
 
 /// setRegClass - Set the register class of the specified virtual register.
 ///
 void
 MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
-  const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
   VRegInfo[Reg].first = RC;
-
-  // Remove from old register class's vregs list. This may be slow but
-  // fortunately this operation is rarely needed.
-  std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
-  std::vector<unsigned>::iterator I =
-    std::find(VRegs.begin(), VRegs.end(), Reg);
-  VRegs.erase(I);
-
-  // Add to new register class's vregs list.
-  RegClass2VRegMap[RC->getID()].push_back(Reg);
 }
 
 const TargetRegisterClass *
@@ -95,7 +82,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
     // The vector reallocated, handle this now.
     HandleVRegListReallocation();
-  RegClass2VRegMap[RegClass->getID()].push_back(Reg);
   return Reg;
 }
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 471463b46f5b..7a55852a1315 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -62,6 +62,7 @@ namespace {
     raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
+    const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
 
@@ -255,6 +256,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
   this->MF = &MF;
   TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
@@ -387,8 +389,6 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
 
 void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
   // Count the number of landing pad successors.
   SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -541,19 +541,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
-  const TargetInstrDesc &TI = MI->getDesc();
-  if (MI->getNumOperands() < TI.getNumOperands()) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MI->getNumOperands() < MCID.getNumOperands()) {
     report("Too few operands", MI);
-    *OS << TI.getNumOperands() << " operands expected, but "
+    *OS << MCID.getNumOperands() << " operands expected, but "
         << MI->getNumExplicitOperands() << " given.\n";
   }
 
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
-    if ((*I)->isLoad() && !TI.mayLoad())
+    if ((*I)->isLoad() && !MCID.mayLoad())
       report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !TI.mayStore())
+    if ((*I)->isStore() && !MCID.mayStore())
       report("Missing mayStore flag", MI);
   }
 
@@ -575,29 +575,30 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
 void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
-  const TargetInstrDesc &TI = MI->getDesc();
-  const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+  const MCInstrDesc &MCID = MI->getDesc();
+  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
 
-  // The first TI.NumDefs operands must be explicit register defines
-  if (MONum < TI.getNumDefs()) {
+  // The first MCID.NumDefs operands must be explicit register defines
+  if (MONum < MCID.getNumDefs()) {
     if (!MO->isReg())
       report("Explicit definition must be a register", MO, MONum);
     else if (!MO->isDef())
       report("Explicit definition marked as use", MO, MONum);
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
-  } else if (MONum < TI.getNumOperands()) {
+  } else if (MONum < MCID.getNumOperands()) {
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
-    if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
-      if (MO->isDef() && !TOI.isOptionalDef())
+    if (MO->isReg() &&
+        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+      if (MO->isDef() && !MCOI.isOptionalDef())
           report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
         report("Explicit operand marked as implicit", MO, MONum);
     }
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
-    if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg())
+    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
       report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
@@ -709,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     }
 
     // Check register classes.
-    if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+    if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
       unsigned SubIdx = MO->getSubReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -723,7 +724,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           sr = s;
         }
-        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
           if (!DRC->contains(sr)) {
             report("Illegal physical register for instruction", MO, MONum);
             *OS << TRI->getName(sr) << " is not a "
@@ -743,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           RC = SRC;
         }
-        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
           if (!RC->hasSuperClassEq(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
             *OS << "Expected a " << DRC->getName() << " register, but got a "
@@ -765,11 +766,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
       LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+      if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
         report("Instruction loads from dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
-      if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+      if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index c105bb06ebe5..c523e39bc258 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -353,10 +353,10 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isMoveImmediate())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isMoveImmediate())
     return false;
-  if (TID.getNumDefs() != 1)
+  if (MCID.getNumDefs() != 1)
     return false;
   unsigned Reg = MI->getOperand(0).getReg();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -429,16 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      const TargetInstrDesc &TID = MI->getDesc();
+      const MCInstrDesc &MCID = MI->getDesc();
 
-      if (TID.isBitcast()) {
+      if (MCID.isBitcast()) {
         if (OptimizeBitcastInstr(MI, MBB)) {
           // MI is deleted.
           Changed = true;
           MII = First ? I->begin() : llvm::next(PMII);
           continue;
         }        
-      } else if (TID.isCompare()) {
+      } else if (MCID.isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           Changed = true;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index ba8501ff7233..c73e87733cb4 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,6 +22,7 @@
 #include "AntiDepBreaker.h"
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -37,7 +38,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -52,7 +53,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
 STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
 
 // Post-RA scheduling is enabled with
-// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
 // override the target.
 static cl::opt<bool>
 EnablePostRAScheduler("post-RA-scheduler",
@@ -80,6 +81,7 @@ namespace {
   class PostRAScheduler : public MachineFunctionPass {
     AliasAnalysis *AA;
     const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
     CodeGenOpt::Level OptLevel;
 
   public:
@@ -135,7 +137,8 @@ namespace {
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      AliasAnalysis *AA, const RegisterClassInfo&,
+      TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
       SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
 
     ~SchedulePostRATDList();
@@ -179,7 +182,8 @@ namespace {
 
 SchedulePostRATDList::SchedulePostRATDList(
   MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  AliasAnalysis *AA, const RegisterClassInfo &RCI,
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
   : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
     KillIndices(TRI->getNumRegs())
@@ -189,10 +193,10 @@ SchedulePostRATDList::SchedulePostRATDList(
   HazardRec =
     TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
   AntiDepBreak =
-    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
-     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
-      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+    ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
 }
 
 SchedulePostRATDList::~SchedulePostRATDList() {
@@ -205,9 +209,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
-  TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
   SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
@@ -215,22 +220,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   } else {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
-    const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+    const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
     if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
       return false;
   }
 
   // Check for antidep breaking override...
   if (EnableAntiDepBreaking.getPosition() > 0) {
-    AntiDepMode = (EnableAntiDepBreaking == "all") ?
-      TargetSubtarget::ANTIDEP_ALL :
-        (EnableAntiDepBreaking == "critical")
-           ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+    AntiDepMode = (EnableAntiDepBreaking == "all")
+      ? TargetSubtargetInfo::ANTIDEP_ALL
+      : ((EnableAntiDepBreaking == "critical")
+         ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+         : TargetSubtargetInfo::ANTIDEP_NONE);
   }
 
   DEBUG(dbgs() << "PostRAScheduler\n");
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
                                  CriticalPathRCs);
 
   // Loop over all of the basic blocks
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
deleted file mode 100644
index d6e31dae9d13..000000000000
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ /dev/null
@@ -1,1430 +0,0 @@
-//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the machine instruction level pre-register allocation
-// live interval splitting pass. It finds live interval barriers, i.e.
-// instructions which will kill all physical registers in certain register
-// classes, and split all live intervals which cross the barrier.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-alloc-split"
-#include "VirtRegMap.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
-                                   cl::Hidden);
-static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
-                                     cl::Hidden);
-
-STATISTIC(NumSplits, "Number of intervals split");
-STATISTIC(NumRemats, "Number of intervals split by rematerialization");
-STATISTIC(NumFolds, "Number of intervals split with spill folding");
-STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
-STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
-STATISTIC(NumDeadSpills, "Number of dead spills removed");
-
-namespace {
-  class PreAllocSplitting : public MachineFunctionPass {
-    MachineFunction       *CurrMF;
-    const TargetMachine   *TM;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo* TRI;
-    MachineFrameInfo      *MFI;
-    MachineRegisterInfo   *MRI;
-    SlotIndexes           *SIs;
-    LiveIntervals         *LIs;
-    LiveStacks            *LSs;
-    VirtRegMap            *VRM;
-
-    // Barrier - Current barrier being processed.
-    MachineInstr          *Barrier;
-
-    // BarrierMBB - Basic block where the barrier resides in.
-    MachineBasicBlock     *BarrierMBB;
-
-    // Barrier - Current barrier index.
-    SlotIndex     BarrierIdx;
-
-    // CurrLI - Current live interval being split.
-    LiveInterval          *CurrLI;
-
-    // CurrSLI - Current stack slot live interval.
-    LiveInterval          *CurrSLI;
-
-    // CurrSValNo - Current val# for the stack slot live interval.
-    VNInfo                *CurrSValNo;
-
-    // IntervalSSMap - A map from live interval to spill slots.
-    DenseMap<unsigned, int> IntervalSSMap;
-
-    // Def2SpillMap - A map from a def instruction index to spill index.
-    DenseMap<SlotIndex, SlotIndex> Def2SpillMap;
-
-  public:
-    static char ID;
-    PreAllocSplitting() : MachineFunctionPass(ID) {
-      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<SlotIndexes>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addRequired<LiveIntervals>();
-      AU.addPreserved<LiveIntervals>();
-      AU.addRequired<LiveStacks>();
-      AU.addPreserved<LiveStacks>();
-      AU.addPreserved<RegisterCoalescer>();
-      AU.addPreserved<CalculateSpillWeights>();
-      AU.addPreservedID(StrongPHIEliminationID);
-      AU.addPreservedID(PHIEliminationID);
-      AU.addRequired<MachineDominatorTree>();
-      AU.addRequired<MachineLoopInfo>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<MachineDominatorTree>();
-      AU.addPreserved<MachineLoopInfo>();
-      AU.addPreserved<VirtRegMap>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-    
-    virtual void releaseMemory() {
-      IntervalSSMap.clear();
-      Def2SpillMap.clear();
-    }
-
-    virtual const char *getPassName() const {
-      return "Pre-Register Allocaton Live Interval Splitting";
-    }
-
-    /// print - Implement the dump method.
-    virtual void print(raw_ostream &O, const Module* M = 0) const {
-      LIs->print(O, M);
-    }
-
-
-  private:
-
-    MachineBasicBlock::iterator
-      findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&);
-
-    MachineBasicBlock::iterator
-      findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
-                     SmallPtrSet<MachineInstr*, 4>&);
-
-    int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
-
-    bool IsAvailableInStack(MachineBasicBlock*, unsigned,
-                            SlotIndex, SlotIndex,
-                            SlotIndex&, int&) const;
-
-    void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex);
-
-    bool SplitRegLiveInterval(LiveInterval*);
-
-    bool SplitRegLiveIntervals(const TargetRegisterClass **,
-                               SmallPtrSet<LiveInterval*, 8>&);
-    
-    bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
-                        MachineBasicBlock* BarrierMBB);
-    bool Rematerialize(unsigned vreg, VNInfo* ValNo,
-                       MachineInstr* DefMI,
-                       MachineBasicBlock::iterator RestorePt,
-                       SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
-                            MachineInstr* DefMI,
-                            MachineInstr* Barrier,
-                            MachineBasicBlock* MBB,
-                            int& SS,
-                            SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    MachineInstr* FoldRestore(unsigned vreg, 
-                              const TargetRegisterClass* RC,
-                              MachineInstr* Barrier,
-                              MachineBasicBlock* MBB,
-                              int SS,
-                              SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    void RenumberValno(VNInfo* VN);
-    void ReconstructLiveInterval(LiveInterval* LI);
-    bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
-    unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
-                               unsigned Reg, int FrameIndex, bool& TwoAddr);
-    VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
-                                   MachineBasicBlock* MBB, LiveInterval* LI,
-                                   SmallPtrSet<MachineInstr*, 4>& Visited,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                        bool IsTopLevel, bool IsIntraBlock);
-    VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
-                                   MachineBasicBlock* MBB, LiveInterval* LI,
-                                   SmallPtrSet<MachineInstr*, 4>& Visited,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                        bool IsTopLevel, bool IsIntraBlock);
-};
-} // end anonymous namespace
-
-char PreAllocSplitting::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
-                "Pre-Register Allocation Live Interval Splitting",
-                false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
-                "Pre-Register Allocation Live Interval Splitting",
-                false, false)
-
-char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
-
-/// findSpillPoint - Find a gap as far away from the given MI that's suitable
-/// for spilling the current live interval. The index must be before any
-/// defs and uses of the live interval register in the mbb. Return begin() if
-/// none is found.
-MachineBasicBlock::iterator
-PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                  MachineInstr *DefMI,
-                                  SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
-  MachineBasicBlock::iterator Pt = MBB->begin();
-
-  MachineBasicBlock::iterator MII = MI;
-  MachineBasicBlock::iterator EndPt = DefMI
-    ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
-    
-  while (MII != EndPt && !RefsInMBB.count(MII) &&
-         MII->getOpcode() != TRI->getCallFrameSetupOpcode())
-    --MII;
-  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-    
-  while (MII != EndPt && !RefsInMBB.count(MII)) {
-    // We can't insert the spill between the barrier (a call), and its
-    // corresponding call frame setup.
-    if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
-      while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
-        --MII;
-        if (MII == EndPt) {
-          return Pt;
-        }
-      }
-      continue;
-    } else {
-      Pt = MII;
-    }
-    
-    if (RefsInMBB.count(MII))
-      return Pt;
-    
-    
-    --MII;
-  }
-
-  return Pt;
-}
-
-/// findRestorePoint - Find a gap in the instruction index map that's suitable
-/// for restoring the current live interval value. The index must be before any
-/// uses of the live interval register in the mbb. Return end() if none is
-/// found.
-MachineBasicBlock::iterator
-PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                    SlotIndex LastIdx,
-                                    SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
-  // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
-  // begin index accordingly.
-  MachineBasicBlock::iterator Pt = MBB->end();
-  MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
-
-  // We start at the call, so walk forward until we find the call frame teardown
-  // since we can't insert restores before that.  Bail if we encounter a use
-  // during this time.
-  MachineBasicBlock::iterator MII = MI;
-  if (MII == EndPt) return Pt;
-  
-  while (MII != EndPt && !RefsInMBB.count(MII) &&
-         MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
-    ++MII;
-  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-  ++MII;
-  
-  // FIXME: Limit the number of instructions to examine to reduce
-  // compile time?
-  while (MII != EndPt) {
-    SlotIndex Index = LIs->getInstructionIndex(MII);
-    if (Index > LastIdx)
-      break;
-      
-    // We can't insert a restore between the barrier (a call) and its 
-    // corresponding call frame teardown.
-    if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
-      do {
-        if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-        ++MII;
-      } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else {
-      Pt = MII;
-    }
-    
-    if (RefsInMBB.count(MII))
-      return Pt;
-    
-    ++MII;
-  }
-
-  return Pt;
-}
-
-/// CreateSpillStackSlot - Create a stack slot for the live interval being
-/// split. If the live interval was previously split, just reuse the same
-/// slot.
-int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
-                                            const TargetRegisterClass *RC) {
-  int SS;
-  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
-  if (I != IntervalSSMap.end()) {
-    SS = I->second;
-  } else {
-    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
-    IntervalSSMap[Reg] = SS;
-  }
-
-  // Create live interval for stack slot.
-  CurrSLI = &LSs->getOrCreateInterval(SS, RC);
-  if (CurrSLI->hasAtLeastOneValue())
-    CurrSValNo = CurrSLI->getValNumInfo(0);
-  else
-    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
-                                       LSs->getVNInfoAllocator());
-  return SS;
-}
-
-/// IsAvailableInStack - Return true if register is available in a split stack
-/// slot at the specified index.
-bool
-PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
-                                    unsigned Reg, SlotIndex DefIndex,
-                                    SlotIndex RestoreIndex,
-                                    SlotIndex &SpillIndex,
-                                    int& SS) const {
-  if (!DefMBB)
-    return false;
-
-  DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
-  if (I == IntervalSSMap.end())
-    return false;
-  DenseMap<SlotIndex, SlotIndex>::const_iterator
-    II = Def2SpillMap.find(DefIndex);
-  if (II == Def2SpillMap.end())
-    return false;
-
-  // If last spill of def is in the same mbb as barrier mbb (where restore will
-  // be), make sure it's not below the intended restore index.
-  // FIXME: Undo the previous spill?
-  assert(LIs->getMBBFromIndex(II->second) == DefMBB);
-  if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
-    return false;
-
-  SS = I->second;
-  SpillIndex = II->second;
-  return true;
-}
-
-/// UpdateSpillSlotInterval - Given the specified val# of the register live
-/// interval being split, and the spill and restore indicies, update the live
-/// interval of the spill stack slot.
-void
-PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex,
-                                           SlotIndex RestoreIndex) {
-  assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
-         "Expect restore in the barrier mbb");
-
-  MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
-  if (MBB == BarrierMBB) {
-    // Intra-block spill + restore. We are done.
-    LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
-    CurrSLI->addRange(SLR);
-    return;
-  }
-
-  SmallPtrSet<MachineBasicBlock*, 4> Processed;
-  SlotIndex EndIdx = LIs->getMBBEndIdx(MBB);
-  LiveRange SLR(SpillIndex, EndIdx, CurrSValNo);
-  CurrSLI->addRange(SLR);
-  Processed.insert(MBB);
-
-  // Start from the spill mbb, figure out the extend of the spill slot's
-  // live interval.
-  SmallVector<MachineBasicBlock*, 4> WorkList;
-  const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
-  if (LR->end > EndIdx)
-    // If live range extend beyond end of mbb, add successors to work list.
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-           SE = MBB->succ_end(); SI != SE; ++SI)
-      WorkList.push_back(*SI);
-
-  while (!WorkList.empty()) {
-    MachineBasicBlock *MBB = WorkList.back();
-    WorkList.pop_back();
-    if (Processed.count(MBB))
-      continue;
-    SlotIndex Idx = LIs->getMBBStartIdx(MBB);
-    LR = CurrLI->getLiveRangeContaining(Idx);
-    if (LR && LR->valno == ValNo) {
-      EndIdx = LIs->getMBBEndIdx(MBB);
-      if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
-        // Spill slot live interval stops at the restore.
-        LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
-        CurrSLI->addRange(SLR);
-      } else if (LR->end > EndIdx) {
-        // Live range extends beyond end of mbb, process successors.
-        LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo);
-        CurrSLI->addRange(SLR);
-        for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-               SE = MBB->succ_end(); SI != SE; ++SI)
-          WorkList.push_back(*SI);
-      } else {
-        LiveRange SLR(Idx, LR->end, CurrSValNo);
-        CurrSLI->addRange(SLR);
-      }
-      Processed.insert(MBB);
-    }
-  }
-}
-
-/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
-/// construction algorithm to compute the ranges and valnos for an interval.
-VNInfo*
-PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
-                                       MachineBasicBlock* MBB, LiveInterval* LI,
-                                       SmallPtrSet<MachineInstr*, 4>& Visited,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                           bool IsTopLevel, bool IsIntraBlock) {
-  // Return memoized result if it's available.
-  if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
-    return NewVNs[UseI];
-  else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
-    return NewVNs[UseI];
-  else if (!IsIntraBlock && LiveOut.count(MBB))
-    return LiveOut[MBB];
-  
-  // Check if our block contains any uses or defs.
-  bool ContainsDefs = Defs.count(MBB);
-  bool ContainsUses = Uses.count(MBB);
-  
-  VNInfo* RetVNI = 0;
-  
-  // Enumerate the cases of use/def contaning blocks.
-  if (!ContainsDefs && !ContainsUses) {
-    return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
-                                          NewVNs, LiveOut, Phis,
-                                          IsTopLevel, IsIntraBlock);
-  } else if (ContainsDefs && !ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
-
-    // Search for the def in this block.  If we don't find it before the
-    // instruction we care about, go to the fallback case.  Note that that
-    // should never happen: this cannot be intrablock, so use should
-    // always be an end() iterator.
-    assert(UseI == MBB->end() && "No use marked in intrablock");
-    
-    MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
-    while (Walker != MBB->begin()) {
-      if (BlockDefs.count(Walker))
-        break;
-      --Walker;
-    }
-    
-    // Once we've found it, extend its VNInfo to our instruction.
-    SlotIndex DefIndex = LIs->getInstructionIndex(Walker);
-    DefIndex = DefIndex.getDefIndex();
-    SlotIndex EndIndex = LIs->getMBBEndIdx(MBB);
-    
-    RetVNI = NewVNs[Walker];
-    LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI));
-  } else if (!ContainsDefs && ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
-    
-    // Search for the use in this block that precedes the instruction we care 
-    // about, going to the fallback case if we don't find it.    
-    MachineBasicBlock::iterator Walker = UseI;
-    bool found = false;
-    while (Walker != MBB->begin()) {
-      --Walker;
-      if (BlockUses.count(Walker)) {
-        found = true;
-        break;
-      }
-    }
-
-    if (!found)
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-
-    SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
-    UseIndex = UseIndex.getUseIndex();
-    SlotIndex EndIndex;
-    if (IsIntraBlock) {
-      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-    } else
-      EndIndex = LIs->getMBBEndIdx(MBB);
-
-    // Now, recursively phi construct the VNInfo for the use we found,
-    // and then extend it to include the instruction we care about
-    RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
-                                    NewVNs, LiveOut, Phis, false, true);
-    
-    LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
-    
-    // FIXME: Need to set kills properly for inter-block stuff.
-  } else if (ContainsDefs && ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
-    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
-    
-    // This case is basically a merging of the two preceding case, with the
-    // special note that checking for defs must take precedence over checking
-    // for uses, because of two-address instructions.
-    MachineBasicBlock::iterator Walker = UseI;
-    bool foundDef = false;
-    bool foundUse = false;
-    while (Walker != MBB->begin()) {
-      --Walker;
-      if (BlockDefs.count(Walker)) {
-        foundDef = true;
-        break;
-      } else if (BlockUses.count(Walker)) {
-        foundUse = true;
-        break;
-      }
-    }
-
-    if (!foundDef && !foundUse)
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-
-    SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
-    StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
-    SlotIndex EndIndex;
-    if (IsIntraBlock) {
-      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-    } else
-      EndIndex = LIs->getMBBEndIdx(MBB);
-
-    if (foundDef)
-      RetVNI = NewVNs[Walker];
-    else
-      RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
-                                      NewVNs, LiveOut, Phis, false, true);
-
-    LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-  }
-  
-  // Memoize results so we don't have to recompute them.
-  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
-  else {
-    if (!NewVNs.count(UseI))
-      NewVNs[UseI] = RetVNI;
-    Visited.insert(UseI);
-  }
-
-  return RetVNI;
-}
-
-/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
-///
-VNInfo*
-PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
-                                       MachineBasicBlock* MBB, LiveInterval* LI,
-                                       SmallPtrSet<MachineInstr*, 4>& Visited,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                           bool IsTopLevel, bool IsIntraBlock) {
-  // NOTE: Because this is the fallback case from other cases, we do NOT
-  // assume that we are not intrablock here.
-  if (Phis.count(MBB)) return Phis[MBB]; 
-
-  SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
-  VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
-                     LIs->getVNInfoAllocator());
-
-  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
-    
-  // If there are no uses or defs between our starting point and the
-  // beginning of the block, then recursive perform phi construction
-  // on our predecessors.
-  DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
-  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-    VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, 
-                                              Visited, Defs, Uses, NewVNs,
-                                              LiveOut, Phis, false, false);
-    if (Incoming != 0)
-      IncomingVNs[*PI] = Incoming;
-  }
-    
-  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
-    VNInfo* OldVN = RetVNI;
-    VNInfo* NewVN = IncomingVNs.begin()->second;
-    VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
-    if (MergedVN == OldVN) std::swap(OldVN, NewVN);
-    
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
-         LOE = LiveOut.end(); LOI != LOE; ++LOI)
-      if (LOI->second == OldVN)
-        LOI->second = MergedVN;
-    for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
-         NVE = NewVNs.end(); NVI != NVE; ++NVI)
-      if (NVI->second == OldVN)
-        NVI->second = MergedVN;
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
-         PE = Phis.end(); PI != PE; ++PI)
-      if (PI->second == OldVN)
-        PI->second = MergedVN;
-    RetVNI = MergedVN;
-  } else {
-    // Otherwise, merge the incoming VNInfos with a phi join.  Create a new
-    // VNInfo to represent the joined value.
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
-           IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
-      I->second->setHasPHIKill(true);
-    }
-  }
-      
-  SlotIndex EndIndex;
-  if (IsIntraBlock) {
-    EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-  } else
-    EndIndex = LIs->getMBBEndIdx(MBB);
-  LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-
-  // Memoize results so we don't have to recompute them.
-  if (!IsIntraBlock)
-    LiveOut[MBB] = RetVNI;
-  else {
-    if (!NewVNs.count(UseI))
-      NewVNs[UseI] = RetVNI;
-    Visited.insert(UseI);
-  }
-
-  return RetVNI;
-}
-
-/// ReconstructLiveInterval - Recompute a live interval from scratch.
-void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
-  VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator();
-  
-  // Clear the old ranges and valnos;
-  LI->clear();
-  
-  // Cache the uses and defs of the register
-  typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
-  RegMap Defs, Uses;
-  
-  // Keep track of the new VNs we're creating.
-  DenseMap<MachineInstr*, VNInfo*> NewVNs;
-  SmallPtrSet<VNInfo*, 2> PhiVNs;
-  
-  // Cache defs, and create a new VNInfo for each def.
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
-       DE = MRI->def_end(); DI != DE; ++DI) {
-    Defs[(*DI).getParent()].insert(&*DI);
-    
-    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = DefIdx.getDefIndex();
-    
-    assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
-    
-    // If the def is a move, set the copy field.
-    if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
-      NewVN->setCopy(&*DI);
-
-    NewVNs[&*DI] = NewVN;
-  }
-  
-  // Cache uses as a separate pass from actually processing them.
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
-       UE = MRI->use_end(); UI != UE; ++UI)
-    Uses[(*UI).getParent()].insert(&*UI);
-    
-  // Now, actually process every use and use a phi construction algorithm
-  // to walk from it to its reaching definitions, building VNInfos along
-  // the way.
-  DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
-  DenseMap<MachineBasicBlock*, VNInfo*> Phis;
-  SmallPtrSet<MachineInstr*, 4> Visited;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
-       UE = MRI->use_end(); UI != UE; ++UI) {
-    PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
-                           Uses, NewVNs, LiveOut, Phis, true, true); 
-  }
-  
-  // Add ranges for dead defs
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
-       DE = MRI->def_end(); DI != DE; ++DI) {
-    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = DefIdx.getDefIndex();
-    
-    if (LI->liveAt(DefIdx)) continue;
-    
-    VNInfo* DeadVN = NewVNs[&*DI];
-    LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
-  }
-}
-
-/// RenumberValno - Split the given valno out into a new vreg, allowing it to
-/// be allocated to a different register.  This function creates a new vreg,
-/// copies the valno and its live ranges over to the new vreg's interval,
-/// removes them from the old interval, and rewrites all uses and defs of
-/// the original reg to the new vreg within those ranges.
-void PreAllocSplitting::RenumberValno(VNInfo* VN) {
-  SmallVector<VNInfo*, 4> Stack;
-  SmallVector<VNInfo*, 4> VNsToCopy;
-  Stack.push_back(VN);
-
-  // Walk through and copy the valno we care about, and any other valnos
-  // that are two-address redefinitions of the one we care about.  These
-  // will need to be rewritten as well.  We also check for safety of the 
-  // renumbering here, by making sure that none of the valno involved has
-  // phi kills.
-  while (!Stack.empty()) {
-    VNInfo* OldVN = Stack.back();
-    Stack.pop_back();
-    
-    // Bail out if we ever encounter a valno that has a PHI kill.  We can't
-    // renumber these.
-    if (OldVN->hasPHIKill()) return;
-    
-    VNsToCopy.push_back(OldVN);
-    
-    // Locate two-address redefinitions
-    for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg),
-         DE = MRI->def_end(); DI != DE; ++DI) {
-      if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue;
-      SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex();
-      VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx);
-      if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) !=
-          VNsToCopy.end())
-        Stack.push_back(NextVN);
-    }
-  }
-  
-  // Create the new vreg
-  unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
-  
-  // Create the new live interval
-  LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
-  
-  for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = 
-       VNsToCopy.end(); OI != OE; ++OI) {
-    VNInfo* OldVN = *OI;
-    
-    // Copy the valno over
-    VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
-    NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
-
-    // Remove the valno from the old interval
-    CurrLI->removeValNo(OldVN);
-  }
-  
-  // Rewrite defs and uses.  This is done in two stages to avoid invalidating
-  // the reg_iterator.
-  SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
-  
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineOperand& MO = I.getOperand();
-    SlotIndex InstrIdx = LIs->getInstructionIndex(&*I);
-    
-    if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) ||
-        (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex())))
-      OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
-  }
-  
-  for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
-       OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
-    MachineInstr* Inst = I->first;
-    unsigned OpIdx = I->second;
-    MachineOperand& MO = Inst->getOperand(OpIdx);
-    MO.setReg(NewVReg);
-  }
-  
-  // Grow the VirtRegMap, since we've created a new vreg.
-  VRM->grow();
-  
-  // The renumbered vreg shares a stack slot with the old register.
-  if (IntervalSSMap.count(CurrLI->reg))
-    IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
-  
-  ++NumRenumbers;
-}
-
-bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
-                                      MachineInstr* DefMI,
-                                      MachineBasicBlock::iterator RestorePt,
-                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  MachineBasicBlock& MBB = *RestorePt->getParent();
-  
-  MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  if (!DefMI || DefMI->getParent() == BarrierMBB)
-    KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
-  else
-    KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
-  
-  if (KillPt == DefMI->getParent()->end())
-    return false;
-  
-  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI);
-  SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
-  
-  ReconstructLiveInterval(CurrLI);
-  RematIdx = RematIdx.getDefIndex();
-  RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
-  
-  ++NumSplits;
-  ++NumRemats;
-  return true;  
-}
-
-MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, 
-                                           const TargetRegisterClass* RC,
-                                           MachineInstr* DefMI,
-                                           MachineInstr* Barrier,
-                                           MachineBasicBlock* MBB,
-                                           int& SS,
-                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  // Go top down if RefsInMBB is empty.
-  if (RefsInMBB.empty())
-    return 0;
-  
-  MachineBasicBlock::iterator FoldPt = Barrier;
-  while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
-         !RefsInMBB.count(FoldPt))
-    --FoldPt;
-  
-  int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg);
-  if (OpIdx == -1)
-    return 0;
-  
-  SmallVector<unsigned, 1> Ops;
-  Ops.push_back(OpIdx);
-  
-  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
-    return 0;
-  
-  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
-  if (I != IntervalSSMap.end()) {
-    SS = I->second;
-  } else {
-    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
-  }
-  
-  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
-  
-  if (FMI) {
-    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
-    FoldPt->eraseFromParent();
-    ++NumFolds;
-    
-    IntervalSSMap[vreg] = SS;
-    CurrSLI = &LSs->getOrCreateInterval(SS, RC);
-    if (CurrSLI->hasAtLeastOneValue())
-      CurrSValNo = CurrSLI->getValNumInfo(0);
-    else
-      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
-                                         LSs->getVNInfoAllocator());
-  }
-  
-  return FMI;
-}
-
-MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, 
-                                             const TargetRegisterClass* RC,
-                                             MachineInstr* Barrier,
-                                             MachineBasicBlock* MBB,
-                                             int SS,
-                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
-    return 0;
-                                       
-  // Go top down if RefsInMBB is empty.
-  if (RefsInMBB.empty())
-    return 0;
-  
-  // Can't fold a restore between a call stack setup and teardown.
-  MachineBasicBlock::iterator FoldPt = Barrier;
-  
-  // Advance from barrier to call frame teardown.
-  while (FoldPt != MBB->getFirstTerminator() &&
-         FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
-    if (RefsInMBB.count(FoldPt))
-      return 0;
-    
-    ++FoldPt;
-  }
-  
-  if (FoldPt == MBB->getFirstTerminator())
-    return 0;
-  else
-    ++FoldPt;
-  
-  // Now find the restore point.
-  while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
-    if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
-      while (FoldPt != MBB->getFirstTerminator() &&
-             FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
-        if (RefsInMBB.count(FoldPt))
-          return 0;
-        
-        ++FoldPt;
-      }
-      
-      if (FoldPt == MBB->getFirstTerminator())
-        return 0;
-    } 
-    
-    ++FoldPt;
-  }
-  
-  if (FoldPt == MBB->getFirstTerminator())
-    return 0;
-  
-  int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
-  if (OpIdx == -1)
-    return 0;
-  
-  SmallVector<unsigned, 1> Ops;
-  Ops.push_back(OpIdx);
-  
-  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
-    return 0;
-  
-  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
-  
-  if (FMI) {
-    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
-    FoldPt->eraseFromParent();
-    ++NumRestoreFolds;
-  }
-  
-  return FMI;
-}
-
-/// SplitRegLiveInterval - Split (spill and restore) the given live interval
-/// so it would not cross the barrier that's being processed. Shrink wrap
-/// (minimize) the live interval to the last uses.
-bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
-  DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
-               << "  result: ");
-
-  CurrLI = LI;
-
-  // Find live range where current interval cross the barrier.
-  LiveInterval::iterator LR =
-    CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex());
-  VNInfo *ValNo = LR->valno;
-
-  assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
-
-  MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
-
-  // If this would create a new join point, do not split.
-  if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
-    DEBUG(dbgs() << "FAILED (would create a new join point).\n");
-    return false;
-  }
-
-  // Find all references in the barrier mbb.
-  SmallPtrSet<MachineInstr*, 4> RefsInMBB;
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineInstr *RefMI = &*I;
-    if (RefMI->getParent() == BarrierMBB)
-      RefsInMBB.insert(RefMI);
-  }
-
-  // Find a point to restore the value after the barrier.
-  MachineBasicBlock::iterator RestorePt =
-    findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
-  if (RestorePt == BarrierMBB->end()) {
-    DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
-    return false;
-  }
-
-  if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
-    if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
-      DEBUG(dbgs() << "success (remat).\n");
-      return true;
-    }
-
-  // Add a spill either before the barrier or after the definition.
-  MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
-  const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
-  SlotIndex SpillIndex;
-  MachineInstr *SpillMI = NULL;
-  int SS = -1;
-  if (!DefMI) {
-    // If we don't know where the def is we must split just before the barrier.
-    if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
-                            BarrierMBB, SS, RefsInMBB))) {
-      SpillIndex = LIs->getInstructionIndex(SpillMI);
-    } else {
-      MachineBasicBlock::iterator SpillPt = 
-        findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
-      if (SpillPt == BarrierMBB->begin()) {
-        DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-        return false; // No gap to insert spill.
-      }
-      // Add spill.
-    
-      SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC,
-                               TRI);
-      SpillMI = prior(SpillPt);
-      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
-    }
-  } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
-                                 LIs->getZeroIndex(), SpillIndex, SS)) {
-    // If it's already split, just restore the value. There is no need to spill
-    // the def again.
-    if (!DefMI) {
-      DEBUG(dbgs() << "FAILED (def is dead).\n");
-      return false; // Def is dead. Do nothing.
-    }
-    
-    if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
-                             BarrierMBB, SS, RefsInMBB))) {
-      SpillIndex = LIs->getInstructionIndex(SpillMI);
-    } else {
-      // Check if it's possible to insert a spill after the def MI.
-      MachineBasicBlock::iterator SpillPt;
-      if (DefMBB == BarrierMBB) {
-        // Add spill after the def and the last use before the barrier.
-        SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
-                                 RefsInMBB);
-        if (SpillPt == DefMBB->begin()) {
-          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-          return false; // No gap to insert spill.
-        }
-      } else {
-        SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
-        if (SpillPt == DefMBB->end()) {
-          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-          return false; // No gap to insert spill.
-        }
-      }
-      // Add spill. 
-      SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC,
-                               TRI);
-      SpillMI = prior(SpillPt);
-      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
-    }
-  }
-
-  // Remember def instruction index to spill index mapping.
-  if (DefMI && SpillMI)
-    Def2SpillMap[ValNo->def] = SpillIndex;
-
-  // Add restore.
-  bool FoldedRestore = false;
-  SlotIndex RestoreIndex;
-  if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
-                                      BarrierMBB, SS, RefsInMBB)) {
-    RestorePt = LMI;
-    RestoreIndex = LIs->getInstructionIndex(RestorePt);
-    FoldedRestore = true;
-  } else {
-    TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI);
-    MachineInstr *LoadMI = prior(RestorePt);
-    RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
-  }
-
-  // Update spill stack slot live interval.
-  UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(),
-                          RestoreIndex.getDefIndex());
-
-  ReconstructLiveInterval(CurrLI);
-
-  if (!FoldedRestore) {
-    SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
-    RestoreIdx = RestoreIdx.getDefIndex();
-    RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
-  }
-  
-  ++NumSplits;
-  DEBUG(dbgs() << "success.\n");
-  return true;
-}
-
-/// SplitRegLiveIntervals - Split all register live intervals that cross the
-/// barrier that's being processed.
-bool
-PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
-                                         SmallPtrSet<LiveInterval*, 8>& Split) {
-  // First find all the virtual registers whose live intervals are intercepted
-  // by the current barrier.
-  SmallVector<LiveInterval*, 8> Intervals;
-  for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
-    // FIXME: If it's not safe to move any instruction that defines the barrier
-    // register class, then it means there are some special dependencies which
-    // codegen is not modelling. Ignore these barriers for now.
-    if (!TII->isSafeToMoveRegClassDefs(*RC))
-      continue;
-    const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
-    for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
-      unsigned Reg = VRs[i];
-      if (!LIs->hasInterval(Reg))
-        continue;
-      LiveInterval *LI = &LIs->getInterval(Reg);
-      if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
-        // Virtual register live interval is intercepted by the barrier. We
-        // should split and shrink wrap its interval if possible.
-        Intervals.push_back(LI);
-    }
-  }
-
-  // Process the affected live intervals.
-  bool Change = false;
-  while (!Intervals.empty()) {
-    if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
-      break;
-    LiveInterval *LI = Intervals.back();
-    Intervals.pop_back();
-    bool result = SplitRegLiveInterval(LI);
-    if (result) Split.insert(LI);
-    Change |= result;
-  }
-
-  return Change;
-}
-
-unsigned PreAllocSplitting::getNumberOfNonSpills(
-                                  SmallPtrSet<MachineInstr*, 4>& MIs,
-                                  unsigned Reg, int FrameIndex,
-                                  bool& FeedsTwoAddr) {
-  unsigned NonSpills = 0;
-  for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
-       UI != UE; ++UI) {
-    int StoreFrameIndex;
-    unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-    if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
-      ++NonSpills;
-    
-    int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
-    if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
-      FeedsTwoAddr = true;
-  }
-  
-  return NonSpills;
-}
-
-/// removeDeadSpills - After doing splitting, filter through all intervals we've
-/// split, and see if any of the spills are unnecessary.  If so, remove them.
-bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
-  bool changed = false;
-  
-  // Walk over all of the live intervals that were touched by the splitter,
-  // and see if we can do any DCE and/or folding.
-  for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
-       LE = split.end(); LI != LE; ++LI) {
-    DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
-    
-    // First, collect all the uses of the vreg, and sort them by their
-    // reaching definition (VNInfo).
-    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-      SlotIndex index = LIs->getInstructionIndex(&*UI);
-      index = index.getUseIndex();
-      
-      const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
-      VNUseCount[LR->valno].insert(&*UI);
-    }
-    
-    // Now, take the definitions (VNInfo's) one at a time and try to DCE 
-    // and/or fold them away.
-    for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
-         VE = (*LI)->vni_end(); VI != VE; ++VI) {
-      
-      if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) 
-        return changed;
-      
-      VNInfo* CurrVN = *VI;
-      
-      // We don't currently try to handle definitions with PHI kills, because
-      // it would involve processing more than one VNInfo at once.
-      if (CurrVN->hasPHIKill()) continue;
-      
-      // We also don't try to handle the results of PHI joins, since there's
-      // no defining instruction to analyze.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
-      if (!DefMI || CurrVN->isUnused()) continue;
-    
-      // We're only interested in eliminating cruft introduced by the splitter,
-      // is of the form load-use or load-use-store.  First, check that the
-      // definition is a load, and remember what stack slot we loaded it from.
-      int FrameIndex;
-      if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
-      
-      // If the definition has no uses at all, just DCE it.
-      if (VNUseCount[CurrVN].size() == 0) {
-        LIs->RemoveMachineInstrFromMaps(DefMI);
-        (*LI)->removeValNo(CurrVN);
-        DefMI->eraseFromParent();
-        VNUseCount.erase(CurrVN);
-        ++NumDeadSpills;
-        changed = true;
-        continue;
-      }
-      
-      // Second, get the number of non-store uses of the definition, as well as
-      // a flag indicating whether it feeds into a later two-address definition.
-      bool FeedsTwoAddr = false;
-      unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
-                                                    (*LI)->reg, FrameIndex,
-                                                    FeedsTwoAddr);
-      
-      // If there's one non-store use and it doesn't feed a two-addr, then
-      // this is a load-use-store case that we can try to fold.
-      if (NonSpillCount == 1 && !FeedsTwoAddr) {
-        // Start by finding the non-store use MachineInstr.
-        SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
-        int StoreFrameIndex;
-        unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-        while (UI != VNUseCount[CurrVN].end() &&
-               (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
-          ++UI;
-          if (UI != VNUseCount[CurrVN].end())
-            StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-        }
-        if (UI == VNUseCount[CurrVN].end()) continue;
-        
-        MachineInstr* use = *UI;
-        
-        // Attempt to fold it away!
-        int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
-        if (OpIdx == -1) continue;
-        SmallVector<unsigned, 1> Ops;
-        Ops.push_back(OpIdx);
-        if (!TII->canFoldMemoryOperand(use, Ops)) continue;
-
-        MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex);
-
-        if (!NewMI) continue;
-
-        // Update relevant analyses.
-        LIs->RemoveMachineInstrFromMaps(DefMI);
-        LIs->ReplaceMachineInstrInMaps(use, NewMI);
-        (*LI)->removeValNo(CurrVN);
-
-        DefMI->eraseFromParent();
-        use->eraseFromParent();
-        VNUseCount[CurrVN].erase(use);
-
-        // Remove deleted instructions.  Note that we need to remove them from 
-        // the VNInfo->use map as well, just to be safe.
-        for (SmallPtrSet<MachineInstr*, 4>::iterator II = 
-             VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
-             II != IE; ++II) {
-          for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
-               VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; 
-               ++VNI)
-            if (VNI->first != CurrVN)
-              VNI->second.erase(*II);
-          LIs->RemoveMachineInstrFromMaps(*II);
-          (*II)->eraseFromParent();
-        }
-        
-        VNUseCount.erase(CurrVN);
-
-        for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
-             VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
-          if (VI->second.erase(use))
-            VI->second.insert(NewMI);
-
-        ++NumDeadSpills;
-        changed = true;
-        continue;
-      }
-      
-      // If there's more than one non-store instruction, we can't profitably
-      // fold it, so bail.
-      if (NonSpillCount) continue;
-        
-      // Otherwise, this is a load-store case, so DCE them.
-      for (SmallPtrSet<MachineInstr*, 4>::iterator UI = 
-           VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
-           UI != UE; ++UI) {
-        LIs->RemoveMachineInstrFromMaps(*UI);
-        (*UI)->eraseFromParent();
-      }
-        
-      VNUseCount.erase(CurrVN);
-        
-      LIs->RemoveMachineInstrFromMaps(DefMI);
-      (*LI)->removeValNo(CurrVN);
-      DefMI->eraseFromParent();
-      ++NumDeadSpills;
-      changed = true;
-    }
-  }
-  
-  return changed;
-}
-
-bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
-                                       MachineBasicBlock* DefMBB,
-                                       MachineBasicBlock* BarrierMBB) {
-  if (DefMBB == BarrierMBB)
-    return false;
-  
-  if (LR->valno->hasPHIKill())
-    return false;
-  
-  SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
-  if (LR->end < MBBEnd)
-    return false;
-  
-  MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
-  if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
-    return true;
-  
-  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-  SmallPtrSet<MachineBasicBlock*, 4> Visited;
-  typedef std::pair<MachineBasicBlock*,
-                    MachineBasicBlock::succ_iterator> ItPair;
-  SmallVector<ItPair, 4> Stack;
-  Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
-  
-  while (!Stack.empty()) {
-    ItPair P = Stack.back();
-    Stack.pop_back();
-    
-    MachineBasicBlock* PredMBB = P.first;
-    MachineBasicBlock::succ_iterator S = P.second;
-    
-    if (S == PredMBB->succ_end())
-      continue;
-    else if (Visited.count(*S)) {
-      Stack.push_back(std::make_pair(PredMBB, ++S));
-      continue;
-    } else
-      Stack.push_back(std::make_pair(PredMBB, S+1));
-    
-    MachineBasicBlock* MBB = *S;
-    Visited.insert(MBB);
-    
-    if (MBB == BarrierMBB)
-      return true;
-    
-    MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
-    MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
-    MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
-    while (MDTN) {
-      if (MDTN == DefMDTN)
-        return true;
-      else if (MDTN == BarrierMDTN)
-        break;
-      MDTN = MDTN->getIDom();
-    }
-    
-    MBBEnd = LIs->getMBBEndIdx(MBB);
-    if (LR->end > MBBEnd)
-      Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
-  }
-  
-  return false;
-} 
-  
-
-bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
-  CurrMF = &MF;
-  TM     = &MF.getTarget();
-  TRI    = TM->getRegisterInfo();
-  TII    = TM->getInstrInfo();
-  MFI    = MF.getFrameInfo();
-  MRI    = &MF.getRegInfo();
-  SIs    = &getAnalysis<SlotIndexes>();
-  LIs    = &getAnalysis<LiveIntervals>();
-  LSs    = &getAnalysis<LiveStacks>();
-  VRM    = &getAnalysis<VirtRegMap>();
-
-  bool MadeChange = false;
-
-  // Make sure blocks are numbered in order.
-  MF.RenumberBlocks();
-
-  MachineBasicBlock *Entry = MF.begin();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
-
-  SmallPtrSet<LiveInterval*, 8> Split;
-
-  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
-         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-       DFI != E; ++DFI) {
-    BarrierMBB = *DFI;
-    for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
-           E = BarrierMBB->end(); I != E; ++I) {
-      Barrier = &*I;
-      const TargetRegisterClass **BarrierRCs =
-        Barrier->getDesc().getRegClassBarriers();
-      if (!BarrierRCs)
-        continue;
-      BarrierIdx = LIs->getInstructionIndex(Barrier);
-      MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
-    }
-  }
-
-  MadeChange |= removeDeadSpills(Split);
-
-  return MadeChange;
-}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index f1f3c9969cc8..a901c5fefa3e 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -145,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
@@ -152,8 +153,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   bool AdjustsStack = MFI->adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
-  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   // Early exit for targets which have no call frame setup/destroy pseudo
   // instructions.
@@ -705,12 +706,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 
   const TargetMachine &TM = Fn.getTarget();
   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
   const TargetFrameLowering *TFI = TM.getFrameLowering();
   bool StackGrowsDown =
     TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
-  int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   for (MachineFunction::iterator BB = Fn.begin(),
          E = Fn.end(); BB != E; ++BB) {
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 1d77b29e2a2e..5ea26adc7644 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -20,6 +20,7 @@
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
 #include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -34,7 +35,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -141,7 +141,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -324,19 +324,21 @@ void RegAllocBase::allocatePhysRegs() {
 
     if (AvailablePhysReg == ~0u) {
       // selectOrSplit failed to find a register!
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Ran out of registers during register allocation!"
-             "\nCannot allocate: " << *VirtReg;
+      const char *Msg = "ran out of registers during register allocation";
+      // Probably caused by an inline asm.
+      MachineInstr *MI;
       for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
-      MachineInstr *MI = I.skipInstruction();) {
-        if (!MI->isInlineAsm())
-          continue;
-        Msg << "\nPlease check your inline asm statement for "
-          "invalid constraints:\n";
-        MI->print(Msg, &VRM->getMachineFunction().getTarget());
-      }
-      report_fatal_error(Msg.str());
+           (MI = I.skipInstruction());)
+        if (MI->isInlineAsm())
+          break;
+      if (MI)
+        MI->emitError(Msg);
+      else
+        report_fatal_error(Msg);
+      // Keep going after reporting the error.
+      VRM->assignVirt2Phys(VirtReg->reg,
+                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+      continue;
     }
 
     if (AvailablePhysReg)
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 97652036f988..b36a445291b7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -86,7 +86,7 @@ namespace {
     // that is currently available in a physical register.
     LiveRegMap LiveVirtRegs;
 
-    DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+    DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
 
     // RegState - Track the state of a physical register.
     enum RegState {
@@ -118,7 +118,7 @@ namespace {
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
     // mark all the clobbered registers as used by the function.
-    SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+    SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
 
     // isBulkSpilling - This flag is set when LiveRegMap will be cleared
     // completely after spilling all live registers. LiveRegMap entries should
@@ -272,7 +272,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+      MachineInstr *DBG = LRIDbgValues[li];
       const MDNode *MDPtr =
         DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
       int64_t Offset = 0;
@@ -291,9 +293,11 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
         MachineBasicBlock *MBB = DBG->getParent();
         MBB->insert(MI, NewDV);
         DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
-        LiveDbgValueMap[LRI->first] = NewDV;
       }
     }
+    // Now this register is spilled there is should not be any DBG_VALUE pointing
+    // to this register because they are all pointing to spilled value now.
+    LRIDbgValues.clear();
     if (SpillKill)
       LR.LastUse = 0; // Don't kill register again
   }
@@ -419,7 +423,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   if (UsedInInstr.test(PhysReg)) {
-    DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n");
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -428,15 +432,15 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   case regFree:
     return 0;
   case regReserved:
-    DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: "
-          << PhysReg << " is reserved already.\n");
+    DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+                 << PrintReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
   default:
     return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
   }
 
   // This is a disabled register, add up cost of aliases.
-  DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n");
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
   unsigned Cost = 0;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
@@ -487,14 +491,12 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
 
   // Take hint when possible.
   if (Hint) {
-    switch(calcSpillCost(Hint)) {
-    default:
-      definePhysReg(MI, Hint, regFree);
-      // Fall through.
-    case 0:
+    // Ignore the hint if we would have to spill a dirty register.
+    unsigned Cost = calcSpillCost(Hint);
+    if (Cost < spillDirty) {
+      if (Cost)
+        definePhysReg(MI, Hint, regFree);
       return assignVirtToPhysReg(LRE, Hint);
-    case spillImpossible:
-      break;
     }
   }
 
@@ -513,7 +515,7 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
   unsigned BestReg = 0, BestCost = spillImpossible;
   for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned Cost = calcSpillCost(*I);
-    DEBUG(dbgs() << "\tRegister: " << *I << "\n");
+    DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
     DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
@@ -528,16 +530,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     return assignVirtToPhysReg(LRE, BestReg);
   }
 
-  // Nothing we can do.
-  std::string msg;
-  raw_string_ostream Msg(msg);
-  Msg << "Ran out of registers during register allocation!";
-  if (MI->isInlineAsm()) {
-    Msg << "\nPlease check your inline asm statement for "
-        << "invalid constraints:\n";
-    MI->print(Msg, TM);
-  }
-  report_fatal_error(Msg.str());
+  // Nothing we can do. Report an error and keep going with a bad allocation.
+  MI->emitError("ran out of registers during register allocation");
+  definePhysReg(MI, *AO.begin(), regFree);
+  assignVirtToPhysReg(LRE, *AO.begin());
 }
 
 /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -724,7 +720,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-    DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n");
+    DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+                 << " as used in instr\n");
     UsedInInstr.set(Reg);
   }
 
@@ -774,7 +771,7 @@ void RAFast::AllocateBasicBlock() {
   // Otherwise, sequentially allocate each instruction in the MBB.
   while (MII != MBB->end()) {
     MachineInstr *MI = MII++;
-    const TargetInstrDesc &TID = MI->getDesc();
+    const MCInstrDesc &MCID = MI->getDesc();
     DEBUG({
         dbgs() << "\n>> " << *MI << "Regs:";
         for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
@@ -818,7 +815,7 @@ void RAFast::AllocateBasicBlock() {
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveDbgValueMap[Reg] = MI;
+          LiveDbgValueMap[Reg].push_back(MI);
           LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
           if (LRI != LiveVirtRegs.end())
             setPhysReg(MI, i, LRI->second.PhysReg);
@@ -887,7 +884,7 @@ void RAFast::AllocateBasicBlock() {
         VirtOpEnd = i+1;
         if (MO.isUse()) {
           hasTiedOps = hasTiedOps ||
-                                TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+                              MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
         } else {
           if (MO.isEarlyClobber())
             hasEarlyClobbers = true;
@@ -917,7 +914,7 @@ void RAFast::AllocateBasicBlock() {
     // We didn't detect inline asm tied operands above, so just make this extra
     // pass for all inline asm.
     if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
-        (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
+        (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
       handleThroughOperands(MI, VirtDead);
       // Don't attempt coalescing when we have funny stuff going on.
       CopyDst = 0;
@@ -962,7 +959,7 @@ void RAFast::AllocateBasicBlock() {
     }
 
     unsigned DefOpEnd = MI->getNumOperands();
-    if (TID.isCall()) {
+    if (MCID.isCall()) {
       // Spill all virtregs before a call. This serves two purposes: 1. If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots, and 2. we don't have to wade through
@@ -973,7 +970,7 @@ void RAFast::AllocateBasicBlock() {
 
       // The imp-defs are skipped below, but we still need to mark those
       // registers as used by the function.
-      SkippedInstrs.insert(&TID);
+      SkippedInstrs.insert(&MCID);
     }
 
     // Third scan.
@@ -1059,7 +1056,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   MRI->closePhysRegsUsed(*TRI);
 
   // Add the clobber lists for all the instructions we skipped earlier.
-  for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+  for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
        I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
     if (const unsigned *Defs = (*I)->getImplicitDefs())
       while (*Defs)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 8d0632567bb1..e235e87b54f3 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -22,6 +22,7 @@
 #include "SpillPlacement.h"
 #include "SplitKit.h"
 #include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
@@ -33,11 +34,9 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineLoopRanges.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -68,7 +67,6 @@ class RAGreedy : public MachineFunctionPass,
   LiveStacks *LS;
   MachineDominatorTree *DomTree;
   MachineLoopInfo *Loops;
-  MachineLoopRanges *LoopRanges;
   EdgeBundles *Bundles;
   SpillPlacement *SpillPlacer;
   LiveDebugVariables *DebugVars;
@@ -76,6 +74,7 @@ class RAGreedy : public MachineFunctionPass,
   // state
   std::auto_ptr<Spiller> SpillerInstance;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+  unsigned NextCascade;
 
   // Live ranges pass through a number of stages as we try to allocate them.
   // Some of the stages may also create new live ranges:
@@ -101,29 +100,49 @@ class RAGreedy : public MachineFunctionPass,
 
   static const char *const StageName[];
 
-  IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
+  // RegInfo - Keep additional information about each live range.
+  struct RegInfo {
+    LiveRangeStage Stage;
+
+    // Cascade - Eviction loop prevention. See canEvictInterference().
+    unsigned Cascade;
+
+    RegInfo() : Stage(RS_New), Cascade(0) {}
+  };
+
+  IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
 
   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
-    return LiveRangeStage(LRStage[VirtReg.reg]);
+    return ExtraRegInfo[VirtReg.reg].Stage;
+  }
+
+  void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
+    ExtraRegInfo[VirtReg.reg].Stage = Stage;
   }
 
   template<typename Iterator>
   void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
-    LRStage.resize(MRI->getNumVirtRegs());
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
     for (;Begin != End; ++Begin) {
       unsigned Reg = (*Begin)->reg;
-      if (LRStage[Reg] == RS_New)
-        LRStage[Reg] = NewStage;
+      if (ExtraRegInfo[Reg].Stage == RS_New)
+        ExtraRegInfo[Reg].Stage = NewStage;
     }
   }
 
-  // Eviction. Sometimes an assigned live range can be evicted without
-  // conditions, but other times it must be split after being evicted to avoid
-  // infinite loops.
-  enum CanEvict {
-    CE_Never,    ///< Can never evict.
-    CE_Always,   ///< Can always evict.
-    CE_WithSplit ///< Can evict only if range is also split or spilled.
+  /// Cost of evicting interference.
+  struct EvictionCost {
+    unsigned BrokenHints; ///< Total number of broken hints.
+    float MaxWeight;      ///< Maximum spill weight evicted.
+
+    EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+    bool operator<(const EvictionCost &O) const {
+      if (BrokenHints != O.BrokenHints)
+        return BrokenHints < O.BrokenHints;
+      return MaxWeight < O.MaxWeight;
+    }
   };
 
   // splitting state.
@@ -139,11 +158,13 @@ class RAGreedy : public MachineFunctionPass,
   /// Global live range splitting candidate info.
   struct GlobalSplitCandidate {
     unsigned PhysReg;
+    InterferenceCache::Cursor Intf;
     BitVector LiveBundles;
     SmallVector<unsigned, 8> ActiveBlocks;
 
-    void reset(unsigned Reg) {
+    void reset(InterferenceCache &Cache, unsigned Reg) {
       PhysReg = Reg;
+      Intf.setPhysReg(Cache, Reg);
       LiveBundles.clear();
       ActiveBlocks.clear();
     }
@@ -185,13 +206,15 @@ private:
   float calcSpillCost();
   bool addSplitConstraints(InterferenceCache::Cursor, float&);
   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
-  void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor);
-  float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor);
+  void growRegion(GlobalSplitCandidate &Cand);
+  float calcGlobalSplitCost(GlobalSplitCandidate&);
   void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
                          SmallVectorImpl<LiveInterval*>&);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
-  CanEvict canEvict(LiveInterval &A, LiveInterval &B);
-  bool canEvictInterference(LiveInterval&, unsigned, float&);
+  bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+  bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+  void evictInterference(LiveInterval&, unsigned,
+                         SmallVectorImpl<LiveInterval*>&);
 
   unsigned tryAssign(LiveInterval&, AllocationOrder&,
                      SmallVectorImpl<LiveInterval*>&);
@@ -228,18 +251,17 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
 
-RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) {
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
   initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-  initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
   initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
   initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
   initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
@@ -264,8 +286,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
-  AU.addRequired<MachineLoopRanges>();
-  AU.addPreserved<MachineLoopRanges>();
   AU.addRequired<VirtRegMap>();
   AU.addPreserved<VirtRegMap>();
   AU.addRequired<EdgeBundles>();
@@ -308,13 +328,13 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
   // LRE may clone a virtual register because dead code elimination causes it to
   // be split into connected components. Ensure that the new register gets the
   // same stage as the parent.
-  LRStage.grow(New);
-  LRStage[New] = LRStage[Old];
+  ExtraRegInfo.grow(New);
+  ExtraRegInfo[New] = ExtraRegInfo[Old];
 }
 
 void RAGreedy::releaseMemory() {
   SpillerInstance.reset(0);
-  LRStage.clear();
+  ExtraRegInfo.clear();
   GlobalCand.clear();
   RegAllocBase::releaseMemory();
 }
@@ -328,11 +348,11 @@ void RAGreedy::enqueue(LiveInterval *LI) {
          "Can only enqueue virtual registers");
   unsigned Prio;
 
-  LRStage.grow(Reg);
-  if (LRStage[Reg] == RS_New)
-    LRStage[Reg] = RS_First;
+  ExtraRegInfo.grow(Reg);
+  if (ExtraRegInfo[Reg].Stage == RS_New)
+    ExtraRegInfo[Reg].Stage = RS_First;
 
-  if (LRStage[Reg] == RS_Second)
+  if (ExtraRegInfo[Reg].Stage == RS_Second)
     // Unsplit ranges that couldn't be allocated immediately are deferred until
     // everything else has been allocated. Long ranges are allocated last so
     // they are split against realistic interference.
@@ -375,7 +395,21 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   if (!PhysReg || Order.isHint(PhysReg))
     return PhysReg;
 
-  // PhysReg is available. Try to evict interference from a cheaper alternative.
+  // PhysReg is available, but there may be a better choice.
+
+  // If we missed a simple hint, try to cheaply evict interference from the
+  // preferred register.
+  if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+    if (Order.isHint(Hint)) {
+      DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+      EvictionCost MaxCost(1);
+      if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+        evictInterference(VirtReg, Hint, NewVRegs);
+        return Hint;
+      }
+    }
+
+  // Try to evict interference from a cheaper alternative.
   unsigned Cost = TRI->getCostPerUse(PhysReg);
 
   // Most registers have 0 additional cost.
@@ -393,31 +427,58 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
 //                         Interference eviction
 //===----------------------------------------------------------------------===//
 
-/// canEvict - determine if A can evict the assigned live range B. The eviction
-/// policy defined by this function together with the allocation order defined
-/// by enqueue() decides which registers ultimately end up being split and
-/// spilled.
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
 ///
-/// This function must define a non-circular relation when it returns CE_Always,
-/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second
-/// range, it is possible to return CE_WithSplit which forces the evicted
-/// register to be split or spilled before it can evict anything again. That
-/// guarantees progress.
-RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) {
-  return A.weight > B.weight ? CE_Always : CE_Never;
+/// @param A          The live range to be assigned.
+/// @param IsHint     True when A is about to be assigned to its preferred
+///                   register.
+/// @param B          The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+                           LiveInterval &B, bool BreaksHint) {
+  bool CanSplit = getStage(B) <= RS_Second;
+
+  // Be fairly aggressive about following hints as long as the evictee can be
+  // split.
+  if (CanSplit && IsHint && !BreaksHint)
+    return true;
+
+  return A.weight > B.weight;
 }
 
-/// canEvict - Return true if all interferences between VirtReg and PhysReg can
-/// be evicted.
-/// Return false if any interference is heavier than MaxWeight.
-/// On return, set MaxWeight to the maximal spill weight of an interference.
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted.  When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @prarm IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+///                when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
-                                    float &MaxWeight) {
-  float Weight = 0;
+                                    bool IsHint, EvictionCost &MaxCost) {
+  // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+  // involved in an eviction before. If a cascade number was assigned, deny
+  // evicting anything with the same or a newer cascade number. This prevents
+  // infinite eviction loops.
+  //
+  // This works out so a register without a cascade number is allowed to evict
+  // anything, and it can be evicted by anything.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = NextCascade;
+
+  EvictionCost Cost;
   for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     // If there is 10 or more interferences, chances are one is heavier.
-    if (Q.collectInterferingVRegs(10, MaxWeight) >= 10)
+    if (Q.collectInterferingVRegs(10) >= 10)
       return false;
 
     // Check if any interfering live range is heavier than MaxWeight.
@@ -425,25 +486,69 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
         return false;
-      if (Intf->weight >= MaxWeight)
-        return false;
-      switch (canEvict(VirtReg, *Intf)) {
-      case CE_Always:
-        break;
-      case CE_Never:
+      // Never evict spill products. They cannot split or spill.
+      if (getStage(*Intf) == RS_Spill)
         return false;
-      case CE_WithSplit:
-        if (getStage(*Intf) > RS_Second)
+      // Once a live range becomes small enough, it is urgent that we find a
+      // register for it. This is indicated by an infinite spill weight. These
+      // urgent live ranges get to evict almost anything.
+      bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+      // Only evict older cascades or live ranges without a cascade.
+      unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+      if (Cascade <= IntfCascade) {
+        if (!Urgent)
           return false;
-        break;
+        // We permit breaking cascades for urgent evictions. It should be the
+        // last resort, though, so make it really expensive.
+        Cost.BrokenHints += 10;
       }
-      Weight = std::max(Weight, Intf->weight);
+      // Would this break a satisfied hint?
+      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+      // Update eviction cost.
+      Cost.BrokenHints += BreaksHint;
+      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+      // Abort if this would be too expensive.
+      if (!(Cost < MaxCost))
+        return false;
+      // Finally, apply the eviction policy for non-urgent evictions.
+      if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+        return false;
     }
   }
-  MaxWeight = Weight;
+  MaxCost = Cost;
   return true;
 }
 
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // Make sure that VirtReg has a cascade number, and assign that cascade
+  // number to every evicted register. These live ranges than then only be
+  // evicted by a newer cascade, preventing infinite loops.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+  DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+               << " interference: Cascade " << Cascade << '\n');
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      unassign(*Intf, VRM->getPhys(Intf->reg));
+      assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+              VirtReg.isSpillable() < Intf->isSpillable()) &&
+             "Cannot decrease cascade number, illegal eviction");
+      ExtraRegInfo[Intf->reg].Cascade = Cascade;
+      ++NumEvicted;
+      NewVRegs.push_back(Intf);
+    }
+  }
+}
+
 /// tryEvict - Try to evict all interferences for a physreg.
 /// @param  VirtReg Currently unassigned virtual register.
 /// @param  Order   Physregs to try.
@@ -454,31 +559,37 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             unsigned CostPerUseLimit) {
   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
 
-  // Keep track of the lightest single interference seen so far.
-  float BestWeight = HUGE_VALF;
+  // Keep track of the cheapest interference seen so far.
+  EvictionCost BestCost(~0u);
   unsigned BestPhys = 0;
 
+  // When we are just looking for a reduced cost per use, don't break any
+  // hints, and only evict smaller spill weights.
+  if (CostPerUseLimit < ~0u) {
+    BestCost.BrokenHints = 0;
+    BestCost.MaxWeight = VirtReg.weight;
+  }
+
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
-    // The first use of a register in a function has cost 1.
-    if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg))
-      continue;
-
-    float Weight = BestWeight;
-    if (!canEvictInterference(VirtReg, PhysReg, Weight))
-      continue;
-
-    // This is an eviction candidate.
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = "
-                 << Weight << '\n');
-    if (BestPhys && Weight >= BestWeight)
+    // The first use of a callee-saved register in a function has cost 1.
+    // Don't start using a CSR when the CostPerUseLimit is low.
+    if (CostPerUseLimit == 1)
+     if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+       if (!MRI->isPhysRegUsed(CSR)) {
+         DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+                      << PrintReg(CSR, TRI) << '\n');
+         continue;
+       }
+
+    if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
       continue;
 
     // Best so far.
     BestPhys = PhysReg;
-    BestWeight = Weight;
+
     // Stop if the hint can be used.
     if (Order.isHint(PhysReg))
       break;
@@ -487,22 +598,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
   if (!BestPhys)
     return 0;
 
-  DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n");
-  for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) {
-    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
-    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
-      LiveInterval *Intf = Q.interferingVRegs()[i];
-      unassign(*Intf, VRM->getPhys(Intf->reg));
-      ++NumEvicted;
-      NewVRegs.push_back(Intf);
-      // Prevent looping by forcing the evicted ranges to be split before they
-      // can evict anything else.
-      if (getStage(*Intf) < RS_Second &&
-          canEvict(VirtReg, *Intf) == CE_WithSplit)
-        LRStage[Intf->reg] = RS_Second;
-    }
-  }
+  evictInterference(VirtReg, BestPhys, NewVRegs);
   return BestPhys;
 }
 
@@ -621,8 +717,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
   SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
 }
 
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
-                          InterferenceCache::Cursor Intf) {
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
   // Keep track of through blocks that have not been added to SpillPlacer.
   BitVector Todo = SA->getThroughBlocks();
   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -633,8 +728,6 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
 
   for (;;) {
     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
-    if (NewBundles.empty())
-      break;
     // Find new through blocks in the periphery of PrefRegBundles.
     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
       unsigned Bundle = NewBundles[i];
@@ -654,12 +747,12 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
       }
     }
     // Any new blocks to add?
-    if (ActiveBlocks.size() > AddedTo) {
-      ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo],
-                             ActiveBlocks.size() - AddedTo);
-      addThroughConstraints(Intf, Add);
-      AddedTo = ActiveBlocks.size();
-    }
+    if (ActiveBlocks.size() == AddedTo)
+      break;
+    addThroughConstraints(Cand.Intf,
+                          ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo));
+    AddedTo = ActiveBlocks.size();
+
     // Perhaps iterating can enable more bundles?
     SpillPlacer->iterate();
   }
@@ -697,8 +790,7 @@ float RAGreedy::calcSpillCost() {
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
-float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
-                                    InterferenceCache::Cursor Intf) {
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
   float GlobalCost = 0;
   const BitVector &LiveBundles = Cand.LiveBundles;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
@@ -725,8 +817,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
       continue;
     if (RegIn && RegOut) {
       // We need double spill code if this block has interference.
-      Intf.moveToBlock(Number);
-      if (Intf.hasInterference())
+      Cand.Intf.moveToBlock(Number);
+      if (Cand.Intf.hasInterference())
         GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
       continue;
     }
@@ -756,188 +848,42 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
     dbgs() << ".\n";
   });
 
-  InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg);
+  InterferenceCache::Cursor &Intf = Cand.Intf;
   LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
   SE->reset(LREdit);
 
   // Create the main cross-block interval.
   const unsigned MainIntv = SE->openIntv();
 
-  // First add all defs that are live out of a block.
+  // First handle all the blocks with uses.
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+    bool RegIn  = BI.LiveIn &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = BI.LiveOut &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
 
     // Create separate intervals for isolated blocks with multiple uses.
-    if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) {
+    if (!RegIn && !RegOut) {
       DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
-      SE->splitSingleBlock(BI);
-      SE->selectIntv(MainIntv);
-      continue;
-    }
-
-    // Should the register be live out?
-    if (!BI.LiveOut || !RegOut)
-      continue;
-
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
-    Intf.moveToBlock(BI.MBB->getNumber());
-    DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
-                 << Bundles->getBundle(BI.MBB->getNumber(), 1)
-                 << " [" << Start << ';'
-                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
-                 << ") intf [" << Intf.first() << ';' << Intf.last() << ')');
-
-    // The interference interval should either be invalid or overlap MBB.
-    assert((!Intf.hasInterference() || Intf.first() < Stop)
-           && "Bad interference");
-    assert((!Intf.hasInterference() || Intf.last() > Start)
-           && "Bad interference");
-
-    // Check interference leaving the block.
-    if (!Intf.hasInterference()) {
-      // Block is interference-free.
-      DEBUG(dbgs() << ", no interference");
-      if (!BI.LiveThrough) {
-        DEBUG(dbgs() << ", not live-through.\n");
-        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
-        continue;
+      if (!BI.isOneInstr()) {
+        SE->splitSingleBlock(BI);
+        SE->selectIntv(MainIntv);
       }
-      if (!RegIn) {
-        // Block is live-through, but entry bundle is on the stack.
-        // Reload just before the first use.
-        DEBUG(dbgs() << ", not live-in, enter before first use.\n");
-        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
-        continue;
-      }
-      DEBUG(dbgs() << ", live-through.\n");
       continue;
     }
 
-    // Block has interference.
-    DEBUG(dbgs() << ", interference to " << Intf.last());
-
-    if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) {
-      // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n');
-      SE->useIntv(BI.FirstUse, Stop);
-      continue;
-    }
-
-    SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
-    if (Intf.last().getBoundaryIndex() < BI.LastUse) {
-      // There are interference-free uses at the end of the block.
-      // Find the first use that can get the live-out register.
-      SmallVectorImpl<SlotIndex>::const_iterator UI =
-        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         Intf.last().getBoundaryIndex());
-      assert(UI != SA->UseSlots.end() && "Couldn't find last use");
-      SlotIndex Use = *UI;
-      assert(Use <= BI.LastUse && "Couldn't find last use");
-      // Only attempt a split befroe the last split point.
-      if (Use.getBaseIndex() <= LastSplitPoint) {
-        DEBUG(dbgs() << ", free use at " << Use << ".\n");
-        SlotIndex SegStart = SE->enterIntvBefore(Use);
-        assert(SegStart >= Intf.last() && "Couldn't avoid interference");
-        assert(SegStart < LastSplitPoint && "Impossible split point");
-        SE->useIntv(SegStart, Stop);
-        continue;
-      }
-    }
-
-    // Interference is after the last use.
-    DEBUG(dbgs() << " after last use.\n");
-    SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB);
-    assert(SegStart >= Intf.last() && "Couldn't avoid interference");
-  }
-
-  // Now all defs leading to live bundles are handled, do everything else.
-  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
-    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
-
-    // Is the register live-in?
-    if (!BI.LiveIn || !RegIn)
-      continue;
-
-    // We have an incoming register. Check for interference.
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
     Intf.moveToBlock(BI.MBB->getNumber());
-    DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
-                 << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';'
-                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
-                 << ')');
 
-    // Check interference entering the block.
-    if (!Intf.hasInterference()) {
-      // Block is interference-free.
-      DEBUG(dbgs() << ", no interference");
-      if (!BI.LiveThrough) {
-        DEBUG(dbgs() << ", killed in block.\n");
-        SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
-        continue;
-      }
-      if (!RegOut) {
-        SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
-        // Block is live-through, but exit bundle is on the stack.
-        // Spill immediately after the last use.
-        if (BI.LastUse < LastSplitPoint) {
-          DEBUG(dbgs() << ", uses, stack-out.\n");
-          SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
-          continue;
-        }
-        // The last use is after the last split point, it is probably an
-        // indirect jump.
-        DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
-                     << LastSplitPoint << ", stack-out.\n");
-        SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint);
-        SE->useIntv(Start, SegEnd);
-        // Run a double interval from the split to the last use.
-        // This makes it possible to spill the complement without affecting the
-        // indirect branch.
-        SE->overlapIntv(SegEnd, BI.LastUse);
-        continue;
-      }
-      // Register is live-through.
-      DEBUG(dbgs() << ", uses, live-through.\n");
-      SE->useIntv(Start, Stop);
-      continue;
-    }
-
-    // Block has interference.
-    DEBUG(dbgs() << ", interference from " << Intf.first());
-
-    if (!BI.LiveThrough && Intf.first() >= BI.LastUse) {
-      // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n');
-      SE->useIntv(Start, BI.LastUse);
-      continue;
-    }
-
-    if (Intf.first().getBaseIndex() > BI.FirstUse) {
-      // There are interference-free uses at the beginning of the block.
-      // Find the last use that can get the register.
-      SmallVectorImpl<SlotIndex>::const_iterator UI =
-        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         Intf.first().getBaseIndex());
-      assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
-      SlotIndex Use = (--UI)->getBoundaryIndex();
-      DEBUG(dbgs() << ", free use at " << *UI << ".\n");
-      SlotIndex SegEnd = SE->leaveIntvAfter(Use);
-      assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
-      SE->useIntv(Start, SegEnd);
-      continue;
-    }
-
-    // Interference is before the first use.
-    DEBUG(dbgs() << " before first use.\n");
-    SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB);
-    assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
+    if (RegIn && RegOut)
+      SE->splitLiveThroughBlock(BI.MBB->getNumber(),
+                                MainIntv, Intf.first(),
+                                MainIntv, Intf.last());
+    else if (RegIn)
+      SE->splitRegInBlock(BI, MainIntv, Intf.first());
+    else
+      SE->splitRegOutBlock(BI, MainIntv, Intf.last());
   }
 
   // Handle live-through blocks.
@@ -945,20 +891,11 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
     unsigned Number = Cand.ActiveBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
-    DEBUG(dbgs() << "Live through BB#" << Number << '\n');
-    if (RegIn && RegOut) {
-      Intf.moveToBlock(Number);
-      if (!Intf.hasInterference()) {
-        SE->useIntv(Indexes->getMBBStartIdx(Number),
-                    Indexes->getMBBEndIdx(Number));
-        continue;
-      }
-    }
-    MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
-    if (RegIn)
-      SE->leaveIntvAtTop(*MBB);
-    if (RegOut)
-      SE->enterIntvAtEnd(*MBB);
+    if (!RegIn && !RegOut)
+      continue;
+    Intf.moveToBlock(Number);
+    SE->splitLiveThroughBlock(Number, RegIn  ? MainIntv : 0, Intf.first(),
+                                      RegOut ? MainIntv : 0, Intf.last());
   }
 
   ++NumGlobalSplits;
@@ -967,7 +904,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
   SE->finish(&IntvMap);
   DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
 
-  LRStage.resize(MRI->getNumVirtRegs());
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
   unsigned OrigBlocks = SA->getNumLiveBlocks();
 
   // Sort out the new intervals created by splitting. We get four kinds:
@@ -976,27 +913,27 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
   // - Block-local splits are candidates for local splitting.
   // - DCE leftovers should go back on the queue.
   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
-    unsigned Reg = LREdit.get(i)->reg;
+    LiveInterval &Reg = *LREdit.get(i);
 
     // Ignore old intervals from DCE.
-    if (LRStage[Reg] != RS_New)
+    if (getStage(Reg) != RS_New)
       continue;
 
     // Remainder interval. Don't try splitting again, spill if it doesn't
     // allocate.
     if (IntvMap[i] == 0) {
-      LRStage[Reg] = RS_Global;
+      setStage(Reg, RS_Global);
       continue;
     }
 
     // Main interval. Allow repeated splitting as long as the number of live
     // blocks is strictly decreasing.
     if (IntvMap[i] == MainIntv) {
-      if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) {
+      if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
         DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
                      << " blocks as original.\n");
         // Don't allow repeated splitting as a safe guard against looping.
-        LRStage[Reg] = RS_Global;
+        setStage(Reg, RS_Global);
       }
       continue;
     }
@@ -1015,17 +952,34 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
   const unsigned NoCand = ~0u;
   unsigned BestCand = NoCand;
+  unsigned NumCands = 0;
 
   Order.rewind();
-  for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
-    if (GlobalCand.size() <= Cand)
-      GlobalCand.resize(Cand+1);
-    GlobalCand[Cand].reset(PhysReg);
+  while (unsigned PhysReg = Order.next()) {
+    // Discard bad candidates before we run out of interference cache cursors.
+    // This will only affect register classes with a lot of registers (>32).
+    if (NumCands == IntfCache.getMaxCursors()) {
+      unsigned WorstCount = ~0u;
+      unsigned Worst = 0;
+      for (unsigned i = 0; i != NumCands; ++i) {
+        if (i == BestCand)
+          continue;
+        unsigned Count = GlobalCand[i].LiveBundles.count();
+        if (Count < WorstCount)
+          Worst = i, WorstCount = Count;
+      }
+      --NumCands;
+      GlobalCand[Worst] = GlobalCand[NumCands];
+    }
+
+    if (GlobalCand.size() <= NumCands)
+      GlobalCand.resize(NumCands+1);
+    GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+    Cand.reset(IntfCache, PhysReg);
 
-    SpillPlacer->prepare(GlobalCand[Cand].LiveBundles);
+    SpillPlacer->prepare(Cand.LiveBundles);
     float Cost;
-    InterferenceCache::Cursor Intf(IntfCache, PhysReg);
-    if (!addSplitConstraints(Intf, Cost)) {
+    if (!addSplitConstraints(Cand.Intf, Cost)) {
       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
     }
@@ -1040,28 +994,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       });
       continue;
     }
-    growRegion(GlobalCand[Cand], Intf);
+    growRegion(Cand);
 
     SpillPlacer->finish();
 
     // No live bundles, defer to splitSingleBlocks().
-    if (!GlobalCand[Cand].LiveBundles.any()) {
+    if (!Cand.LiveBundles.any()) {
       DEBUG(dbgs() << " no bundles.\n");
       continue;
     }
 
-    Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf);
+    Cost += calcGlobalSplitCost(Cand);
     DEBUG({
       dbgs() << ", total = " << Cost << " with bundles";
-      for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0;
-           i = GlobalCand[Cand].LiveBundles.find_next(i))
+      for (int i = Cand.LiveBundles.find_first(); i>=0;
+           i = Cand.LiveBundles.find_next(i))
         dbgs() << " EB#" << i;
       dbgs() << ".\n";
     });
     if (Cost < BestCost) {
-      BestCand = Cand;
+      BestCand = NumCands;
       BestCost = Hysteresis * Cost; // Prevent rounding effects.
     }
+    ++NumCands;
   }
 
   if (BestCand == NoCand)
@@ -1302,10 +1257,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   if (NewGaps >= NumGaps) {
     DEBUG(dbgs() << "Tagging non-progress ranges: ");
     assert(!ProgressRequired && "Didn't make progress when it was required.");
-    LRStage.resize(MRI->getNumVirtRegs());
     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
       if (IntvMap[i] == 1) {
-        LRStage[LREdit.get(i)->reg] = RS_Local;
+        setStage(*LREdit.get(i), RS_Local);
         DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
       }
     DEBUG(dbgs() << '\n');
@@ -1384,7 +1338,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
     return PhysReg;
 
   LiveRangeStage Stage = getStage(VirtReg);
-  DEBUG(dbgs() << StageName[Stage] << '\n');
+  DEBUG(dbgs() << StageName[Stage]
+               << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
 
   // Try to evict a less worthy live range, but only for ranges from the primary
   // queue. The RS_Second ranges already failed to do this, and they should not
@@ -1399,7 +1354,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
   // Wait until the second time, when all smaller ranges have been allocated.
   // This gives a better picture of the interference to split around.
   if (Stage == RS_First) {
-    LRStage[VirtReg.reg] = RS_Second;
+    setStage(VirtReg, RS_Second);
     DEBUG(dbgs() << "wait for second round\n");
     NewVRegs.push_back(&VirtReg);
     return 0;
@@ -1407,7 +1362,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 
   // If we couldn't allocate a register from spilling, there is probably some
   // invalid inline assembly. The base class wil report it.
-  if (Stage >= RS_Spill)
+  if (Stage >= RS_Spill || !VirtReg.isSpillable())
     return ~0u;
 
   // Try splitting VirtReg or interferences.
@@ -1443,15 +1398,15 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   DomTree = &getAnalysis<MachineDominatorTree>();
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
   Loops = &getAnalysis<MachineLoopInfo>();
-  LoopRanges = &getAnalysis<MachineLoopRanges>();
   Bundles = &getAnalysis<EdgeBundles>();
   SpillPlacer = &getAnalysis<SpillPlacement>();
   DebugVars = &getAnalysis<LiveDebugVariables>();
 
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
-  LRStage.clear();
-  LRStage.resize(MRI->getNumVirtRegs());
+  ExtraRegInfo.clear();
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
+  NextCascade = 1;
   IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
 
   allocatePhysRegs();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5ef88cb74ba5..0dd3c598c154 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -16,7 +16,9 @@
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
 #include "Spiller.h"
+#include "RegisterCoalescer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
@@ -27,7 +29,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -57,11 +58,6 @@ NewHeuristic("new-spilling-heuristic",
              cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-PreSplitIntervals("pre-alloc-split",
-                  cl::desc("Pre-register allocation live interval splitting"),
-                  cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
 TrivCoalesceEnds("trivial-coalesce-ends",
                   cl::desc("Attempt trivial coalescing of interval ends"),
                   cl::init(false), cl::Hidden);
@@ -100,10 +96,9 @@ namespace {
       initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
       initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
       initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeRegisterCoalescerAnalysisGroup(
+      initializeRegisterCoalescerPass(
         *PassRegistry::getPassRegistry());
       initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
-      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
       initializeLiveStacksPass(*PassRegistry::getPassRegistry());
       initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
       initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -148,6 +143,7 @@ namespace {
     BitVector reservedRegs_;
     LiveIntervals* li_;
     MachineLoopInfo *loopInfo;
+    RegisterClassInfo RegClassInfo;
 
     /// handled_ - Intervals are added to the handled_ set in the order of their
     /// start value.  This is uses for backtracking.
@@ -215,8 +211,6 @@ namespace {
       // to coalescing and which analyses coalescing invalidates.
       AU.addRequiredTransitive<RegisterCoalescer>();
       AU.addRequired<CalculateSpillWeights>();
-      if (PreSplitIntervals)
-        AU.addRequiredID(PreAllocSplittingID);
       AU.addRequiredID(LiveStacksID);
       AU.addPreservedID(LiveStacksID);
       AU.addRequired<MachineLoopInfo>();
@@ -366,13 +360,10 @@ namespace {
     /// getFirstNonReservedPhysReg - return the first non-reserved physical
     /// register in the register class.
     unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
-        TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
-        TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
-        while (i != aoe && reservedRegs_.test(*i))
-          ++i;
-        assert(i != aoe && "All registers reserved?!");
-        return *i;
-      }
+      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+      assert(!O.empty() && "All registers reserved?!");
+      return O.front();
+    }
 
     void ComputeRelatedRegClasses();
 
@@ -402,11 +393,10 @@ INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
 INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
                     "Linear Scan Register Allocator", false, false)
@@ -524,6 +514,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   reservedRegs_ = tri_->getReservedRegs(fn);
   li_ = &getAnalysis<LiveIntervals>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
+  RegClassInfo.runOnMachineFunction(fn);
 
   // We don't run the coalescer here because we have no reason to
   // interact with it.  If the coalescer requires interaction, it
@@ -1166,14 +1157,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   bool Found = false;
   std::vector<std::pair<unsigned,float> > RegsWeights;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
   if (!minReg || SpillWeights[minReg] == HUGE_VALF)
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       float regWeight = SpillWeights[reg];
-      // Don't even consider reserved regs.
-      if (reservedRegs_.test(reg))
-        continue;
       // Skip recently allocated registers and reserved registers.
       if (minWeight > regWeight && !isRecentlyUsed(reg))
         Found = true;
@@ -1182,11 +1170,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   // If we didn't find a register that is spillable, try aliases?
   if (!Found) {
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
-      if (reservedRegs_.test(reg))
-        continue;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       // No need to worry about if the alias register size < regsize of RC.
       // We are going to spill all registers that alias it anyway.
       for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1446,13 +1431,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
-  TargetRegisterClass::iterator I, E;
-  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
-  assert(I != E && "No allocatable register in this register class!");
+  ArrayRef<unsigned> Order;
+  if (Hint.first)
+    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+  else
+    Order = RegClassInfo.getOrder(RC);
+
+  assert(!Order.empty() && "No allocatable register in this register class!");
 
   // Scan for the first available register.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
@@ -1482,8 +1471,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
   // reevaluated now.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 1e1f1e0d3470..72230d4b0c5c 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -35,6 +35,7 @@
 #include "Splitter.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "RegisterCoalescer.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -46,7 +47,6 @@
 #include "llvm/CodeGen/PBQP/Graph.h"
 #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -84,11 +84,11 @@ public:
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
-      : MachineFunctionPass(ID), builder(b) {
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-    initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -122,6 +122,8 @@ private:
 
   std::auto_ptr<PBQPBuilder> builder;
 
+  char *customPassID;
+
   MachineFunction *mf;
   const TargetMachine *tm;
   const TargetRegisterInfo *tri;
@@ -222,10 +224,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     // Compute an initial allowed set for the current vreg.
     typedef std::vector<unsigned> VRAllowed;
     VRAllowed vrAllowed;
-    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
-                                       aoEnd = trc->allocation_order_end(*mf);
-         aoItr != aoEnd; ++aoItr) {
-      unsigned preg = *aoItr;
+    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    for (unsigned i = 0; i != rawOrder.size(); ++i) {
+      unsigned preg = rawOrder[i];
       if (!reservedRegs.test(preg)) {
         vrAllowed.push_back(preg);
       }
@@ -450,6 +451,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
   au.addRequired<RegisterCoalescer>();
+  if (customPassID)
+    au.addRequiredID(*customPassID);
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
@@ -581,7 +584,7 @@ void RegAllocPBQP::finalizeAlloc() const {
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
-      physReg = *liRC->allocation_order_begin(*mf);
+      physReg = liRC->getRawAllocationOrder(*mf).front();
     }
 
     vrm->assignVirt2Phys(li->reg, physReg);
@@ -703,8 +706,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder) {
-  return new RegAllocPBQP(builder);
+                                           std::auto_ptr<PBQPBuilder> builder,
+                                           char *customPassID) {
+  return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 75b0c90be8fc..5a77e47bc591 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -81,11 +81,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
-  TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
-
-  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
-    unsigned PhysReg = *I;
+  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  for (unsigned i = 0; i != RawOrder.size(); ++i) {
+    unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
     if (Reserved.test(PhysReg))
       continue;
@@ -103,7 +101,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   DEBUG({
     dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
-    for (unsigned I = 0; I != N; ++I)
+    for (unsigned I = 0; I != RCI.NumRegs; ++I)
       dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
     dbgs() << " ]\n";
   });
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 6f7d9c94969c..d21fd67efe8b 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -112,7 +112,7 @@ public:
   /// register, so a register allocator needs to track its liveness and
   /// availability.
   bool isAllocatable(unsigned PhysReg) const {
-    return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg);
+    return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
   }
 };
 } // end namespace llvm
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 407559a211a0..b91f92c6aa5a 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,38 +13,92 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/RegisterCoalescer.h"
+#define DEBUG_TYPE "regcoalescing"
+#include "RegisterCoalescer.h"
+#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Pass.h"
-
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
 using namespace llvm;
 
-// Register the RegisterCoalescer interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", 
-                          SimpleRegisterCoalescing)
-char RegisterCoalescer::ID = 0;
+STATISTIC(numJoins    , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends  , "Number of copies extended");
+STATISTIC(NumReMats   , "Number of instructions re-materialized");
+STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts   , "Number of times interval joining aborted");
 
-// RegisterCoalescer destructor: DO NOT move this to the header file
-// for RegisterCoalescer or else clients of the RegisterCoalescer
-// class may not depend on the RegisterCoalescer.o file in the current
-// .a file, causing alias analysis support to not be included in the
-// tool correctly!
-//
-RegisterCoalescer::~RegisterCoalescer() {}
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+              cl::desc("Coalesce copies (default=true)"),
+              cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
+               cl::init(false), cl::Hidden);
 
-unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+static cl::opt<bool>
+EnablePhysicalJoin("join-physregs",
+                   cl::desc("Join physical register copies"),
+                   cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+                      "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+                    "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
   if (!a) return b;
   if (!b) return a;
-  return tri_.composeSubRegIndices(a, b);
+  return tri.composeSubRegIndices(a, b);
 }
 
-bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
-                                unsigned &Src, unsigned &Dst,
-                                unsigned &SrcSub, unsigned &DstSub) const {
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+                        unsigned &Src, unsigned &Dst,
+                        unsigned &SrcSub, unsigned &DstSub) {
   if (MI->isCopy()) {
     Dst = MI->getOperand(0).getReg();
     DstSub = MI->getOperand(0).getSubReg();
@@ -52,7 +106,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
     SrcSub = MI->getOperand(1).getSubReg();
   } else if (MI->isSubregToReg()) {
     Dst = MI->getOperand(0).getReg();
-    DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+    DstSub = compose(tri, MI->getOperand(0).getSubReg(),
+                     MI->getOperand(3).getImm());
     Src = MI->getOperand(2).getReg();
     SrcSub = MI->getOperand(2).getSubReg();
   } else
@@ -66,7 +121,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
   flipped_ = crossClass_ = false;
 
   unsigned Src, Dst, SrcSub, DstSub;
-  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
     return false;
   partial_ = SrcSub || DstSub;
 
@@ -156,7 +211,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
   if (!MI)
     return false;
   unsigned Src, Dst, SrcSub, DstSub;
-  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
     return false;
 
   // Find the virtual register that is srcReg_.
@@ -185,13 +240,1558 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
     if (dstReg_ != Dst)
       return false;
     // Registers match, do the subregisters line up?
-    return compose(subIdx_, SrcSub) == DstSub;
+    return compose(tri_, subIdx_, SrcSub) == DstSub;
+  }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
+  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+  JoinedCopies.insert(CopyMI);
+
+  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+  /// code elimination.
+  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+       E = CopyMI->operands_end(); I != E; ++I)
+    if (I->isReg())
+      I->setIsUndef(true);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy.  For example:
+///
+///  A3 = B0
+///    ...
+///  B1 = A3      <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
+                                                    MachineInstr *CopyMI) {
+  // Bail if there is no dst interval - can happen when merging physical subreg
+  // operations.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
+  // the example above.
+  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+  if (BLR == IntB.end()) return false;
+  VNInfo *BValNo = BLR->valno;
+
+  // Get the location that B is defined at.  Two options: either this value has
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
+  // can't process it.
+  if (!BValNo->isDefByCopy()) return false;
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+  // The live range might not exist after fun with physreg coalescing.
+  if (ALR == IntA.end()) return false;
+  VNInfo *AValNo = ALR->valno;
+  // If it's re-defined by an early clobber somewhere in the live range, then
+  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+  // See PR3149:
+  // 172     %ECX<def> = MOV32rr %reg1039<kill>
+  // 180     INLINEASM <es:subl $5,$1
+  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+  //         %EAX<kill>,
+  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+  // 188     %EAX<def> = MOV32rr %EAX<kill>
+  // 196     %ECX<def> = MOV32rr %ECX<kill>
+  // 204     %ECX<def> = MOV32rr %ECX<kill>
+  // 212     %EAX<def> = MOV32rr %EAX<kill>
+  // 220     %EAX<def> = MOV32rr %EAX
+  // 228     %reg1039<def> = MOV32rr %ECX<kill>
+  // The early clobber operand ties ECX input to the ECX def.
+  //
+  // The live interval of ECX is represented as this:
+  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
+  // The coalescer has no idea there was a def in the middle of [174,230].
+  if (AValNo->hasRedefByEC())
+    return false;
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
+  // Get the instruction that defines this value number.
+  if (!CP.isCoalescable(AValNo->getCopy()))
+    return false;
+
+  // Get the LiveRange in IntB that this value number starts with.
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+  if (ValLR == IntB.end())
+    return false;
+
+  // Make sure that the end of the live range is inside the same block as
+  // CopyMI.
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+    return false;
+
+  // Okay, we now know that ValLR ends in the same block that the CopyMI
+  // live-range starts.  If there are no intervening live ranges between them in
+  // IntB, we can merge them.
+  if (ValLR+1 != BLR) return false;
+
+  // If a live interval is a physical register, conservatively check if any
+  // of its aliases is overlapping the live interval of the virtual register.
+  // If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
+        DEBUG({
+            dbgs() << "\t\tInterfere with alias ";
+            li_->getInterval(*AS).print(dbgs(), tri_);
+          });
+        return false;
+      }
+  }
+
+  DEBUG({
+      dbgs() << "Extending: ";
+      IntB.print(dbgs(), tri_);
+    });
+
+  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+  // We are about to delete CopyMI, so need to remove it as the 'instruction
+  // that defines this value #'. Update the valnum with the new defining
+  // instruction #.
+  BValNo->def  = FillerStart;
+  BValNo->setCopy(0);
+
+  // Okay, we can merge them.  We need to insert a new liverange:
+  // [ValLR.end, BLR.begin) of either value number, then we merge the
+  // two value numbers.
+  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+  // If the IntB live range is assigned to a physical register, and if that
+  // physreg has sub-registers, update their live intervals as well.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+      if (!li_->hasInterval(*SR))
+        continue;
+      LiveInterval &SRLI = li_->getInterval(*SR);
+      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+                              SRLI.getNextValue(FillerStart, 0,
+                                                li_->getVNInfoAllocator())));
+    }
+  }
+
+  // Okay, merge "B1" into the same value number as "B0".
+  if (BValNo != ValLR->valno) {
+    // If B1 is killed by a PHI, then the merged live range must also be killed
+    // by the same PHI, as B0 and B1 can not overlap.
+    bool HasPHIKill = BValNo->hasPHIKill();
+    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+    if (HasPHIKill)
+      ValLR->valno->setHasPHIKill(true);
+  }
+  DEBUG({
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
+    });
+
+  // If the source instruction was killing the source register before the
+  // merge, unset the isKill marker given the live range has been extended.
+  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+  if (UIdx != -1) {
+    ValLREndInst->getOperand(UIdx).setIsKill(false);
+  }
+
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (ALR->end == CopyIdx)
+    li_->shrinkToUses(&IntA);
+
+  ++numExtends;
+  return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
+                                                    LiveInterval &IntB,
+                                                    VNInfo *AValNo,
+                                                    VNInfo *BValNo) {
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    LiveInterval::Ranges::iterator BI =
+      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+    if (BI != IntB.ranges.begin())
+      --BI;
+    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+      if (BI->valno == BValNo)
+        continue;
+      if (BI->start <= AI->start && BI->end > AI->start)
+        return true;
+      if (BI->start > AI->start && BI->start < AI->end)
+        return true;
+    }
   }
+  return false;
 }
 
-// Because of the way .a files work, we must force the SimpleRC
-// implementation to be pulled in if the RegisterCoalescer classes are
-// pulled in.  Otherwise we run the risk of RegisterCoalescer being
-// used, but the default implementation not being linked into the tool
-// that uses it.
-DEFINING_FILE_FOR(RegisterCoalescer)
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB.  If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+///  A3 = op A2 B0<kill>
+///    ...
+///  B1 = A3      <- this copy
+///    ...
+///     = op A3   <- more uses
+///
+/// ==>
+///
+///  B2 = op B0 A2<kill>
+///    ...
+///  B1 = B2      <- now an identify copy
+///    ...
+///     = op B2   <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+                                                        MachineInstr *CopyMI) {
+  // FIXME: For now, only eliminate the copy by commuting its def when the
+  // source register is a virtual register. We want to guard against cases
+  // where the copy is a back edge copy and commuting the def lengthen the
+  // live interval of the source register to the entire loop.
+  if (CP.isPhys() && CP.isFlipped())
+    return false;
+
+  // Bail if there is no dst interval.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+  // the example above.
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
+
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
+
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+  if (!DefMI)
+    return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
+    return false;
+
+  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+  unsigned NewReg = NewDstMO.getReg();
+  if (NewReg != IntB.reg || !NewDstMO.isKill())
+    return false;
+
+  // Make sure there are no other definitions of IntB that would reach the
+  // uses which the new definition can reach.
+  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+    return false;
+
+  // Abort if the aliases of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) &&
+          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+        return false;
+
+  // If some of the uses of IntA.reg is already coalesced away, return false.
+  // It's not possible to determine whether it's safe to perform the coalescing.
+  for (MachineRegisterInfo::use_nodbg_iterator UI = 
+         mri_->use_nodbg_begin(IntA.reg), 
+       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end())
+      continue;
+    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+      return false;
+  }
+
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
+
+  // At this point we have decided that it is legal to do this
+  // transformation.  Start by commuting the instruction.
+  MachineBasicBlock *MBB = DefMI->getParent();
+  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+  if (!NewMI)
+    return false;
+  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+    return false;
+  if (NewMI != DefMI) {
+    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+    MBB->insert(DefMI, NewMI);
+    MBB->erase(DefMI);
+  }
+  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+  NewMI->getOperand(OpIdx).setIsKill();
+
+  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+  // A = or A, B
+  // ...
+  // B = A
+  // ...
+  // C = A<kill>
+  // ...
+  //   = B
+
+  // Update uses of IntA of the specific Val# with IntB.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+         UE = mri_->use_end(); UI != UE;) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    ++UI;
+    if (JoinedCopies.count(UseMI))
+      continue;
+    if (UseMI->isDebugValue()) {
+      // FIXME These don't have an instruction index.  Not clear we have enough
+      // info to decide whether to do this replacement or not.  For now do it.
+      UseMO.setReg(NewReg);
+      continue;
+    }
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end() || ULR->valno != AValNo)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      UseMO.substPhysReg(NewReg, *tri_);
+    else
+      UseMO.setReg(NewReg);
+    if (UseMI == CopyMI)
+      continue;
+    if (!UseMI->isCopy())
+      continue;
+    if (UseMI->getOperand(0).getReg() != IntB.reg ||
+        UseMI->getOperand(0).getSubReg())
+      continue;
+
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
+    SlotIndex DefIdx = UseIdx.getDefIndex();
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
+      continue;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    markAsJoined(UseMI);
+  }
+
+  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+  // is updated.
+  VNInfo *ValNo = BValNo;
+  ValNo->def = AValNo->def;
+  ValNo->setCopy(0);
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+  }
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+  IntA.removeValNo(AValNo);
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
+  ++numCommutes;
+  return true;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
+                                                       unsigned DstReg,
+                                                       unsigned DstSubIdx,
+                                                       MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+  assert(SrcLR != SrcInt.end() && "Live range not found!");
+  VNInfo *ValNo = SrcLR->valno;
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
+  assert(DefMI && "Defining instruction disappeared");
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isAsCheapAsAMove())
+    return false;
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+    return false;
+  bool SawStore = false;
+  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+    return false;
+  if (MCID.getNumDefs() != 1)
+    return false;
+  if (!DefMI->isImplicitDef()) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
+
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
+  if (DstSubIdx) {
+    const MCInstrDesc &MCID = DefMI->getDesc();
+    if (MCID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  RemoveCopyFlag(DstReg, CopyMI);
+
+  MachineBasicBlock *MBB = CopyMI->getParent();
+  MachineBasicBlock::iterator MII =
+    llvm::next(MachineBasicBlock::iterator(CopyMI));
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+  MachineInstr *NewMI = prior(MII);
+
+  // CopyMI may have implicit operands, transfer them over to the newly
+  // rematerialized instruction. And update implicit def interval valnos.
+  for (unsigned i = CopyMI->getDesc().getNumOperands(),
+         e = CopyMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = CopyMI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+    if (MO.isDef())
+      RemoveCopyFlag(MO.getReg(), CopyMI);
+  }
+
+  NewMI->copyImplicitOps(CopyMI);
+  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+  CopyMI->eraseFromParent();
+  ReMatCopies.insert(CopyMI);
+  ReMatDefs.insert(DefMI);
+  DEBUG(dbgs() << "Remat: " << *NewMI);
+  ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
+  return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
+  bool DstIsPhys = CP.isPhys();
+  unsigned SrcReg = CP.getSrcReg();
+  unsigned DstReg = CP.getDstReg();
+  unsigned SubIdx = CP.getSubIdx();
+
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+    // instead.
+    if (DstIsPhys) {
+      if (UseMI->isCopy() &&
+          !UseMI->getOperand(1).getSubReg() &&
+          !UseMI->getOperand(0).getSubReg() &&
+          UseMI->getOperand(1).getReg() == SrcReg &&
+          UseMI->getOperand(0).getReg() != SrcReg &&
+          UseMI->getOperand(0).getReg() != DstReg &&
+          !JoinedCopies.count(UseMI) &&
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
+                                  UseMI->getOperand(0).getReg(), 0, UseMI))
+        continue;
+    }
+
+    SmallVector<unsigned,8> Ops;
+    bool Reads, Writes;
+    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+    bool Kills = false, Deads = false;
+
+    // Replace SrcReg with DstReg in all UseMI operands.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = UseMI->getOperand(Ops[i]);
+      Kills |= MO.isKill();
+      Deads |= MO.isDead();
+
+      if (DstIsPhys)
+        MO.substPhysReg(DstReg, *tri_);
+      else
+        MO.substVirtReg(DstReg, SubIdx, *tri_);
+    }
+
+    // This instruction is a copy that will be removed.
+    if (JoinedCopies.count(UseMI))
+      continue;
+
+    if (SubIdx) {
+      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+      // read-modify-write of DstReg.
+      if (Deads)
+        UseMI->addRegisterDead(DstReg, tri_);
+      else if (!Reads && Writes)
+        UseMI->addRegisterDefined(DstReg, tri_);
+
+      // Kill flags apply to the whole physical register.
+      if (DstIsPhys && Kills)
+        UseMI->addRegisterKilled(DstReg, tri_);
+    }
+
+    DEBUG({
+        dbgs() << "\t\tupdated: ";
+        if (!UseMI->isDebugValue())
+          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+        dbgs() << *UseMI;
+      });
+  }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+                                  const TargetRegisterInfo *tri_) {
+  if (li.empty()) {
+    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+        if (!li_->hasInterval(*SR))
+          continue;
+        LiveInterval &sli = li_->getInterval(*SR);
+        if (sli.empty())
+          li_->removeInterval(*SR);
+      }
+    li_->removeInterval(li.reg);
+    return true;
+  }
+  return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
+                                             MachineInstr *DefMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+  if (DefIdx != MLR->valno->def)
+    return false;
+  li.removeValNo(MLR->valno);
+  return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
+                                              const MachineInstr *CopyMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+  if (li_->hasInterval(DstReg)) {
+    LiveInterval &LI = li_->getInterval(DstReg);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return;
+  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+    if (!li_->hasInterval(*AS))
+      continue;
+    LiveInterval &LI = li_->getInterval(*AS);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+}
+
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
+  bool Allocatable = li_->isAllocatable(CP.getDstReg());
+  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+  /// Always join simple intervals that are defined by a single copy from a
+  /// reserved register. This doesn't increase register pressure, so it is
+  /// always beneficial.
+  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+    return true;
+
+  if (!EnablePhysicalJoin) {
+    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+    return false;
+  }
+
+  // Only coalesce to allocatable physreg, we don't want to risk modifying
+  // reserved registers.
+  if (!Allocatable) {
+    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+    return false;  // Not coalescable.
+  }
+
+  // Don't join with physregs that have a ridiculous number of live
+  // ranges. The data structure performance is really bad when that
+  // happens.
+  if (li_->hasInterval(CP.getDstReg()) &&
+      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+    ++numAborts;
+    DEBUG(dbgs()
+          << "\tPhysical register live interval too complicated, abort!\n");
+    return false;
+  }
+
+  // FIXME: Why are we skipping this test for partial copies?
+  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+  if (!CP.isPartial()) {
+    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
+    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+    if (Length > Threshold) {
+      ++numAborts;
+      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
+                                             unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+
+  // Coalesce aggressively if the intervals are small compared to the number of
+  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+  // less aggressive than the 8 used for the whole function size.
+  const unsigned ThresSize = 4 * NewRCCount;
+  if (SrcSize <= ThresSize && DstSize <= ThresSize)
+    return true;
+
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > ThresSize) {
+    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > ThresSize) {
+    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+      return false;
+  }
+  return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI.  This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+
+  Again = false;
+  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+    return false; // Already done.
+
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+  CoalescerPair CP(*tii_, *tri_);
+  if (!CP.setRegisters(CopyMI)) {
+    DEBUG(dbgs() << "\tNot coalescable.\n");
+    return false;
+  }
+
+  // If they are already joined we continue.
+  if (CP.getSrcReg() == CP.getDstReg()) {
+    markAsJoined(CopyMI);
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
+    return false;  // Not coalescable.
+  }
+
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
+               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+               << "\n");
+
+  // Enforce policies.
+  if (CP.isPhys()) {
+    if (!shouldJoinPhys(CP)) {
+      // Before giving up coalescing, if definition of source is defined by
+      // trivial computation, try rematerializing it.
+      if (!CP.isFlipped() &&
+          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                  CP.getDstReg(), 0, CopyMI))
+        return true;
+      return false;
+    }
+  } else {
+    // Avoid constraining virtual register regclass too much.
+    if (CP.isCrossClass()) {
+      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
+      if (DisableCrossClassJoin) {
+        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+        return false;
+      }
+      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+                                 mri_->getRegClass(CP.getSrcReg()),
+                                 mri_->getRegClass(CP.getDstReg()),
+                                 CP.getNewRC())) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
+    }
+
+    // When possible, let DstReg be the larger interval.
+    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+                           li_->getInterval(CP.getDstReg()).ranges.size())
+      CP.flip();
+  }
+
+  // Okay, attempt to join these two intervals.  On failure, this returns false.
+  // Otherwise, if one of the intervals being joined is a physreg, this method
+  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
+  // been modified, so we can use this information below to update aliases.
+  if (!JoinIntervals(CP)) {
+    // Coalescing failed.
+
+    // If definition of source is defined by trivial computation, try
+    // rematerializing it.
+    if (!CP.isFlipped() &&
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                CP.getDstReg(), 0, CopyMI))
+      return true;
+
+    // If we can eliminate the copy without merging the live ranges, do so now.
+    if (!CP.isPartial()) {
+      if (AdjustCopiesBackFrom(CP, CopyMI) ||
+          RemoveCopyByCommutingDef(CP, CopyMI)) {
+        markAsJoined(CopyMI);
+        DEBUG(dbgs() << "\tTrivial!\n");
+        return true;
+      }
+    }
+
+    // Otherwise, we are unable to join the intervals.
+    DEBUG(dbgs() << "\tInterference!\n");
+    Again = true;  // May be possible to coalesce later.
+    return false;
+  }
+
+  // Coalescing to a virtual register that is of a sub-register class of the
+  // other. Make sure the resulting register is set to the right register class.
+  if (CP.isCrossClass()) {
+    ++numCrossRCs;
+    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+  }
+
+  // Remember to delete the copy instruction.
+  markAsJoined(CopyMI);
+
+  UpdateRegDefsUses(CP);
+
+  // If we have extended the live range of a physical register, make sure we
+  // update live-in lists as well.
+  if (CP.isPhys()) {
+    SmallVector<MachineBasicBlock*, 16> BlockSeq;
+    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+    // ranges for this, and they are preserved.
+    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+         I != E; ++I ) {
+      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+        MachineBasicBlock &block = *BlockSeq[idx];
+        if (!block.isLiveIn(CP.getDstReg()))
+          block.addLiveIn(CP.getDstReg());
+      }
+      BlockSeq.clear();
+    }
+  }
+
+  // SrcReg is guarateed to be the register whose live interval that is
+  // being merged.
+  li_->removeInterval(CP.getSrcReg());
+
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+
+  DEBUG({
+    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+    dbgs() << "\tJoined. Result = ";
+    DstInt.print(dbgs(), tri_);
+    dbgs() << "\n";
+  });
+
+  ++numJoins;
+  return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be.  This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve.  InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+                                  SmallVector<VNInfo*, 16> &NewVNInfo,
+                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+                                  SmallVector<int, 16> &ThisValNoAssignments,
+                                  SmallVector<int, 16> &OtherValNoAssignments) {
+  unsigned VN = VNI->id;
+
+  // If the VN has already been computed, just return it.
+  if (ThisValNoAssignments[VN] >= 0)
+    return ThisValNoAssignments[VN];
+  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+  // If this val is not a copy from the other val, then it must be a new value
+  // number in the destination.
+  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+  if (I == ThisFromOther.end()) {
+    NewVNInfo.push_back(VNI);
+    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+  }
+  VNInfo *OtherValNo = I->second;
+
+  // Otherwise, this *is* a copy from the RHS.  If the other side has already
+  // been computed, return it.
+  if (OtherValNoAssignments[OtherValNo->id] >= 0)
+    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+  // Mark this value number as currently being computed, then ask what the
+  // ultimate value # of the other value is.
+  ThisValNoAssignments[VN] = -2;
+  unsigned UltimateVN =
+    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+                      OtherValNoAssignments, ThisValNoAssignments);
+  return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+
+// Find out if we have something like
+// A = X
+// B = X
+// if so, we can pretend this is actually
+// A = X
+// B = A
+// which allows us to coalesce A and B.
+// VNI is the definition of B. LR is the life range of A that includes
+// the slot just before B. If we return true, we add "B = X" to DupCopies.
+static bool RegistersDefinedFromSameValue(LiveIntervals &li,
+                                          const TargetRegisterInfo &tri,
+                                          CoalescerPair &CP,
+                                          VNInfo *VNI,
+                                          LiveRange *LR,
+                                     SmallVector<MachineInstr*, 8> &DupCopies) {
+  // FIXME: This is very conservative. For example, we don't handle
+  // physical registers.
+
+  MachineInstr *MI = VNI->getCopy();
+
+  if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+    return false;
+
+  unsigned Dst = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(Src) ||
+      !TargetRegisterInfo::isVirtualRegister(Dst))
+    return false;
+
+  unsigned A = CP.getDstReg();
+  unsigned B = CP.getSrcReg();
+
+  if (B == Dst)
+    std::swap(A, B);
+  assert(Dst == A);
+
+  VNInfo *Other = LR->valno;
+  if (!Other->isDefByCopy())
+    return false;
+  const MachineInstr *OtherMI = Other->getCopy();
+
+  if (!OtherMI->isFullCopy())
+    return false;
+
+  unsigned OtherDst = OtherMI->getOperand(0).getReg();
+  unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
+      !TargetRegisterInfo::isVirtualRegister(OtherDst))
+    return false;
+
+  assert(OtherDst == B);
+
+  if (Src != OtherSrc)
+    return false;
+
+  // If the copies use two different value numbers of X, we cannot merge
+  // A and B.
+  LiveInterval &SrcInt = li.getInterval(Src);
+  if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def))
+    return false;
+
+  DupCopies.push_back(MI);
+
+  return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
+  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // If a live interval is a physical register, check for interference with any
+  // aliases. The interference check implemented here is a bit more conservative
+  // than the full interfeence check below. We allow overlapping live ranges
+  // only when one is a copy of the other.
+  if (CP.isPhys()) {
+    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+      if (!li_->hasInterval(*AS))
+        continue;
+      const LiveInterval &LHS = li_->getInterval(*AS);
+      LiveInterval::const_iterator LI = LHS.begin();
+      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+           RI != RE; ++RI) {
+        LI = std::lower_bound(LI, LHS.end(), RI->start);
+        // Does LHS have an overlapping live range starting before RI?
+        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+            (RI->start != RI->valno->def ||
+             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+          DEBUG({
+            dbgs() << "\t\tInterference from alias: ";
+            LHS.print(dbgs(), tri_);
+            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+          });
+          return false;
+        }
+
+        // Check that LHS ranges beginning in this range are copies.
+        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+          if (LI->start != LI->valno->def ||
+              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+            DEBUG({
+              dbgs() << "\t\tInterference from alias: ";
+              LHS.print(dbgs(), tri_);
+              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+            });
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // Compute the final value assignment, assuming that the live ranges can be
+  // coalesced.
+  SmallVector<int, 16> LHSValNoAssignments;
+  SmallVector<int, 16> RHSValNoAssignments;
+  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+  SmallVector<VNInfo*, 16> NewVNInfo;
+
+  SmallVector<MachineInstr*, 8> DupCopies;
+
+  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // Loop over the value numbers of the LHS, seeing if any are defined from
+  // the RHS.
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the RHS.
+    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the LHS interval.  If the src is
+    // from the RHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+      continue;
+
+    LHSValsDefinedFromRHS[VNI] = lr->valno;
+  }
+
+  // Loop over the value numbers of the RHS, seeing if any are defined from
+  // the LHS.
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the LHS.
+    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the RHS interval.  If the src is
+    // from the LHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+        continue;
+
+    RHSValsDefinedFromLHS[VNI] = lr->valno;
+  }
+
+  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+                      LHSValNoAssignments, RHSValNoAssignments);
+  }
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    // If this value number isn't a copy from the LHS, it's a new number.
+    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+      NewVNInfo.push_back(VNI);
+      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+      continue;
+    }
+
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+                      RHSValNoAssignments, LHSValNoAssignments);
+  }
+
+  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+  // interval lists to see if these intervals are coalescable.
+  LiveInterval::const_iterator I = LHS.begin();
+  LiveInterval::const_iterator IE = LHS.end();
+  LiveInterval::const_iterator J = RHS.begin();
+  LiveInterval::const_iterator JE = RHS.end();
+
+  // Skip ahead until the first place of potential sharing.
+  if (I != IE && J != JE) {
+    if (I->start < J->start) {
+      I = std::upper_bound(I, IE, J->start);
+      if (I != LHS.begin()) --I;
+    } else if (J->start < I->start) {
+      J = std::upper_bound(J, JE, I->start);
+      if (J != RHS.begin()) --J;
+    }
+  }
+
+  while (I != IE && J != JE) {
+    // Determine if these two live ranges overlap.
+    bool Overlaps;
+    if (I->start < J->start) {
+      Overlaps = I->end > J->start;
+    } else {
+      Overlaps = J->end > I->start;
+    }
+
+    // If so, check value # info to determine if they are really different.
+    if (Overlaps) {
+      // If the live range overlap will map to the same value number in the
+      // result liverange, we can still coalesce them.  If not, we can't.
+      if (LHSValNoAssignments[I->valno->id] !=
+          RHSValNoAssignments[J->valno->id])
+        return false;
+      // If it's re-defined by an early clobber somewhere in the live range,
+      // then conservatively abort coalescing.
+      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+        return false;
+    }
+
+    if (I->end < J->end)
+      ++I;
+    else
+      ++J;
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned LHSValID = LHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned RHSValID = RHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
+  }
+
+  if (LHSValNoAssignments.empty())
+    LHSValNoAssignments.push_back(-1);
+  if (RHSValNoAssignments.empty())
+    RHSValNoAssignments.push_back(-1);
+
+  SmallVector<unsigned, 8> SourceRegisters;
+  for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
+         E = DupCopies.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+
+    // We have pretended that the assignment to B in
+    // A = X
+    // B = X
+    // was actually a copy from A. Now that we decided to coalesce A and B,
+    // transform the code into
+    // A = X
+    // X = X
+    // and mark the X as coalesced to keep the illusion.
+    unsigned Src = MI->getOperand(1).getReg();
+    SourceRegisters.push_back(Src);
+    MI->getOperand(0).substVirtReg(Src, 0, *tri_);
+
+    markAsJoined(MI);
+  }
+
+  // If B = X was the last use of X in a liverange, we have to shrink it now
+  // that B = X is gone.
+  for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
+         E = SourceRegisters.end(); I != E; ++I) {
+    li_->shrinkToUses(&li_->getInterval(*I));
+  }
+
+  // If we get here, we know that we can coalesce the live ranges.  Ask the
+  // intervals to coalesce themselves now.
+  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+           mri_);
+  return true;
+}
+
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                                            std::vector<MachineInstr*> &TryAgain) {
+  DEBUG(dbgs() << MBB->getName() << ":\n");
+
+  SmallVector<MachineInstr*, 8> VirtCopies;
+  SmallVector<MachineInstr*, 8> PhysCopies;
+  SmallVector<MachineInstr*, 8> ImpDefCopies;
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E;) {
+    MachineInstr *Inst = MII++;
+
+    // If this isn't a copy nor a extract_subreg, we can't join intervals.
+    unsigned SrcReg, DstReg;
+    if (Inst->isCopy()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(1).getReg();
+    } else if (Inst->isSubregToReg()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(2).getReg();
+    } else
+      continue;
+
+    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(Inst);
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(Inst);
+    else
+      VirtCopies.push_back(Inst);
+  }
+
+  // Try coalescing implicit copies and insert_subreg <undef> first,
+  // followed by copies to / from physical registers, then finally copies
+  // from virtual registers to virtual registers.
+  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = ImpDefCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = PhysCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = VirtCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+}
+
+void RegisterCoalescer::joinIntervals() {
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+  std::vector<MachineInstr*> TryAgainList;
+  if (loopInfo->empty()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      CopyCoalesceInMBB(I, TryAgainList);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+
+    // Join intervals in the function prolog first. We want to join physical
+    // registers with virtual registers before the intervals got too long.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+      MachineBasicBlock *MBB = I;
+      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+    }
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order.
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+  }
+
+  // Joining intervals can allow other intervals to be joined.  Iteratively join
+  // until we make no progress.
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
+
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      MachineInstr *&TheCopy = TryAgainList[i];
+      if (!TheCopy)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy= 0;   // Mark this one as done.
+        ProgressMade = true;
+      }
+    }
+  }
+}
+
+void RegisterCoalescer::releaseMemory() {
+  JoinedCopies.clear();
+  ReMatCopies.clear();
+  ReMatDefs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
+  RegClassInfo.runOnMachineFunction(fn);
+
+  // Join (coalesce) intervals if requested.
+  if (EnableJoining) {
+    joinIntervals();
+    DEBUG({
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
+        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+             I != E; ++I){
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
+        }
+      });
+  }
+
+  // Perform a final pass over the instructions and compute spill weights
+  // and remove identity moves.
+  SmallVector<unsigned, 4> DeadDefs;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ) {
+      MachineInstr *MI = mii;
+      if (JoinedCopies.count(MI)) {
+        // Delete all coalesced copies.
+        bool DoDelete = true;
+        assert(MI->isCopyLike() && "Unrecognized copy instruction");
+        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+            MI->getNumOperands() > 2)
+          // Do not delete extract_subreg, insert_subreg of physical
+          // registers unless the definition is dead. e.g.
+          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+          // or else the scavenger may complain. LowerSubregs will
+          // delete them later.
+          DoDelete = false;
+
+        if (MI->allDefsAreDead()) {
+          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+              li_->hasInterval(SrcReg))
+            li_->shrinkToUses(&li_->getInterval(SrcReg));
+          DoDelete = true;
+        }
+        if (!DoDelete) {
+          // We need the instruction to adjust liveness, so make it a KILL.
+          if (MI->isSubregToReg()) {
+            MI->RemoveOperand(3);
+            MI->RemoveOperand(1);
+          }
+          MI->setDesc(tii_->get(TargetOpcode::KILL));
+          mii = llvm::next(mii);
+        } else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
+        }
+        continue;
+      }
+
+      // Now check if this is a remat'ed def instruction which is now dead.
+      if (ReMatDefs.count(MI)) {
+        bool isDead = true;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!Reg)
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(Reg))
+            DeadDefs.push_back(Reg);
+          if (MO.isDead())
+            continue;
+          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+              !mri_->use_nodbg_empty(Reg)) {
+            isDead = false;
+            break;
+          }
+        }
+        if (isDead) {
+          while (!DeadDefs.empty()) {
+            unsigned DeadDef = DeadDefs.back();
+            DeadDefs.pop_back();
+            RemoveDeadDef(li_->getInterval(DeadDef), MI);
+          }
+          li_->RemoveMachineInstrFromMaps(mii);
+          mii = mbbi->erase(mii);
+          continue;
+        } else
+          DeadDefs.clear();
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
+          MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
+      }
+    }
+  }
+
+  DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
+  return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+   li_->print(O, m);
+}
+
+RegisterCoalescer *llvm::createRegisterCoalescer() {
+  return new RegisterCoalescer();
+}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/RegisterCoalescer.h
index 92f6c6474c63..4131d91c00e9 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,37 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a simple register copy coalescing phase.
+// This file contains the abstract interface for register coalescers, 
+// allowing them to interact with and query register allocators.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
-#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "RegisterClassInfo.h"
+#include "llvm/Support/IncludeFile.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
 
 namespace llvm {
-  class SimpleRegisterCoalescing;
-  class LiveDebugVariables;
+
+  class MachineFunction;
+  class RegallocQuery;
+  class AnalysisUsage;
+  class MachineInstr;
   class TargetRegisterInfo;
+  class TargetRegisterClass;
   class TargetInstrInfo;
+  class LiveDebugVariables;
   class VirtRegMap;
   class MachineLoopInfo;
 
-  /// CopyRec - Representation for copy instructions in coalescer queue.
-  ///
-  struct CopyRec {
-    MachineInstr *MI;
-    unsigned LoopDepth;
-    CopyRec(MachineInstr *mi, unsigned depth)
-      : MI(mi), LoopDepth(depth) {}
-  };
+  class CoalescerPair;
 
-  class SimpleRegisterCoalescing : public MachineFunctionPass,
-                                   public RegisterCoalescer {
+  /// An abstract interface for register coalescers.  Coalescers must
+  /// implement this interface to be part of the coalescer analysis
+  /// group.
+  class RegisterCoalescer : public MachineFunctionPass {
     MachineFunction* mf_;
     MachineRegisterInfo* mri_;
     const TargetMachine* tm_;
@@ -61,41 +62,20 @@ namespace llvm {
     /// been remat'ed.
     SmallPtrSet<MachineInstr*, 8> ReMatDefs;
 
-  public:
-    static char ID; // Pass identifcation, replacement for typeid
-    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
-      initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual void releaseMemory();
-
-    /// runOnMachineFunction - pass entry point
-    virtual bool runOnMachineFunction(MachineFunction&);
-
-    bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
-      // This runs as an independent pass, so don't do anything.
-      return false;
-    }
-
-    /// print - Implement the dump method.
-    virtual void print(raw_ostream &O, const Module* = 0) const;
-
-  private:
     /// joinIntervals - join compatible live intervals
     void joinIntervals();
 
     /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
     /// copies that cannot yet be coalesced into the "TryAgain" list.
     void CopyCoalesceInMBB(MachineBasicBlock *MBB,
-                           std::vector<CopyRec> &TryAgain);
+                           std::vector<MachineInstr*> &TryAgain);
 
     /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
     /// which are the src/dst of the copy instruction CopyMI.  This returns true
     /// if the copy was successfully coalesced away. If it is not currently
     /// possible to coalesce this interval, but it may be possible if other
     /// things get coalesced, then it returns true by reference in 'Again'.
-    bool JoinCopy(CopyRec &TheCopy, bool &Again);
+    bool JoinCopy(MachineInstr *TheCopy, bool &Again);
 
     /// JoinIntervals - Attempt to join these two intervals.  On failure, this
     /// returns false.  The output "SrcInt" will not have been modified, so we can
@@ -155,8 +135,109 @@ namespace llvm {
 
     /// markAsJoined - Remember that CopyMI has already been joined.
     void markAsJoined(MachineInstr *CopyMI);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    RegisterCoalescer() : MachineFunctionPass(ID) {
+      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// Register allocators must call this from their own
+    /// getAnalysisUsage to cover the case where the coalescer is not
+    /// a Pass in the proper sense and isn't managed by PassManager.
+    /// PassManager needs to know which analyses to make available and
+    /// which to invalidate when running the register allocator or any
+    /// pass that might call coalescing.  The long-term solution is to
+    /// allow hierarchies of PassManagers.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
   };
 
+  /// CoalescerPair - A helper class for register coalescers. When deciding if
+  /// two registers can be coalesced, CoalescerPair can determine if a copy
+  /// instruction would become an identity copy after coalescing.
+  class CoalescerPair {
+    const TargetInstrInfo &tii_;
+    const TargetRegisterInfo &tri_;
+
+    /// dstReg_ - The register that will be left after coalescing. It can be a
+    /// virtual or physical register.
+    unsigned dstReg_;
+
+    /// srcReg_ - the virtual register that will be coalesced into dstReg.
+    unsigned srcReg_;
+
+    /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
+    /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+    /// virtual register.
+    unsigned subIdx_;
+
+    /// partial_ - True when the original copy was a partial subregister copy.
+    bool partial_;
+
+    /// crossClass_ - True when both regs are virtual, and newRC is constrained.
+    bool crossClass_;
+
+    /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
+    /// instruction.
+    bool flipped_;
+
+    /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+    /// is a physreg.
+    const TargetRegisterClass *newRC_;
+
+  public:
+    CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+      : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
+        partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+
+    /// setRegisters - set registers to match the copy instruction MI. Return
+    /// false if MI is not a coalescable copy instruction.
+    bool setRegisters(const MachineInstr*);
+
+    /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
+    /// because dstReg_ is a physical register, or subIdx_ is set.
+    bool flip();
+
+    /// isCoalescable - Return true if MI is a copy instruction that will become
+    /// an identity copy after coalescing.
+    bool isCoalescable(const MachineInstr*) const;
+
+    /// isPhys - Return true if DstReg is a physical register.
+    bool isPhys() const { return !newRC_; }
+
+    /// isPartial - Return true if the original copy instruction did not copy the
+    /// full register, but was a subreg operation.
+    bool isPartial() const { return partial_; }
+
+    /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
+    bool isCrossClass() const { return crossClass_; }
+
+    /// isFlipped - Return true when getSrcReg is the register being defined by
+    /// the original copy instruction.
+    bool isFlipped() const { return flipped_; }
+
+    /// getDstReg - Return the register (virtual or physical) that will remain
+    /// after coalescing.
+    unsigned getDstReg() const { return dstReg_; }
+
+    /// getSrcReg - Return the virtual register that will be coalesced away.
+    unsigned getSrcReg() const { return srcReg_; }
+
+    /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+    /// coalesced into, or 0.
+    unsigned getSubIdx() const { return subIdx_; }
+
+    /// getNewRC - Return the register class of the coalesced register.
+    const TargetRegisterClass *getNewRC() const { return newRC_; }
+  };
 } // End llvm namespace
 
 #endif
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index c8de3823553c..8b02ec44273a 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -434,8 +434,7 @@ namespace llvm {
            rcEnd = tri->regclass_end();
          rcItr != rcEnd; ++rcItr) {
       const TargetRegisterClass *trc = *rcItr;
-      unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
-                                        trc->allocation_order_end(*mf));
+      unsigned capacity = trc->getRawAllocationOrder(*mf).size();
 
       if (capacity != 0)
         capacityMap[trc] = capacity;
@@ -482,8 +481,7 @@ namespace llvm {
                rcItr != rcEnd; ++rcItr) {
             const TargetRegisterClass *trc = *rcItr;
 
-            if (trc->allocation_order_begin(*mf) ==
-                trc->allocation_order_end(*mf))
+            if (trc->getRawAllocationOrder(*mf).empty())
               continue;
 
             unsigned worstAtI = getWorst(li->reg, trc);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1302395f423e..21375b286c99 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,23 +19,33 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+  "stress-sched", cl::Hidden, cl::init(false),
+  cl::desc("Stress test instruction scheduling"));
+#endif
+
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
     TRI(TM.getRegisterInfo()),
     MF(mf), MRI(mf.getRegInfo()),
     EntrySU(), ExitSU() {
+#ifndef NDEBUG
+  StressSched = StressSchedOpt;
+#endif
 }
 
 ScheduleDAG::~ScheduleDAG() {}
 
 /// getInstrDesc helper to handle SDNodes.
-const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
   if (!Node || !Node->isMachineOpcode()) return NULL;
   return &TII->get(Node->getMachineOpcode());
 }
@@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
+      if (I->isAssignedRegDep())
+        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
       dbgs() << "\n";
     }
   }
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 6b7a8c6491bd..f8b1bc76eb8b 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -45,6 +45,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       unsigned Reg = 0;
       for (SUnit::const_succ_iterator II = SU->Succs.begin(),
              EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
         if (II->getReg()) {
           Reg = II->getReg();
           break;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 2363df429e36..446adfc2b626 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,10 +21,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
@@ -205,7 +206,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   bool UnitLatencies = ForceUnitLatencies();
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
   unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
@@ -236,13 +237,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       continue;
     }
 
-    const TargetInstrDesc &TID = MI->getDesc();
-    assert(!TID.isTerminator() && !MI->isLabel() &&
+    const MCInstrDesc &MCID = MI->getDesc();
+    assert(!MCID.isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
-    SU->isCall = TID.isCall();
-    SU->isCommutable = TID.isCommutable();
+    SU->isCall = MCID.isCall();
+    SU->isCommutable = MCID.isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -309,13 +310,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           if (SpecialAddressLatency != 0 && !UnitLatencies &&
               UseSU != &ExitSU) {
             MachineInstr *UseMI = UseSU->getInstr();
-            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
             int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
             assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
             if (RegUseIndex >= 0 &&
-                (UseTID.mayLoad() || UseTID.mayStore()) &&
-                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
-                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+                (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+                UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
               LDataLatency += SpecialAddressLatency;
           }
           // Adjust the dependence latency using operand def/use
@@ -352,17 +353,17 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
             unsigned Count = I->second.second;
             const MachineInstr *UseMI = UseMO->getParent();
             unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
             // TODO: If we knew the total depth of the region here, we could
             // handle the case where the whole loop is inside the region but
             // is large enough that the isScheduleHigh trick isn't needed.
-            if (UseMOIdx < UseTID.getNumOperands()) {
+            if (UseMOIdx < UseMCID.getNumOperands()) {
               // Currently, we only support scheduling regions consisting of
               // single basic blocks. Check to see if the instruction is in
               // the same region by checking to see if it has the same parent.
               if (UseMI->getParent() != MI->getParent()) {
                 unsigned Latency = SU->Latency;
-                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+                if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
                   Latency += SpecialAddressLatency;
                 // This is a wild guess as to the portion of the latency which
                 // will be overlapped by work done outside the current
@@ -374,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                                     /*isMustAlias=*/false,
                                     /*isArtificial=*/true));
               } else if (SpecialAddressLatency > 0 &&
-                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+                         UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
                 // The entire loop body is within the current scheduling region
                 // and the latency of this operation is assumed to be greater
                 // than the latency of the loop.
@@ -417,9 +418,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
+    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() &&
-         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
@@ -458,7 +459,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       PendingLoads.clear();
       AliasMemDefs.clear();
       AliasMemUses.clear();
-    } else if (TID.mayStore()) {
+    } else if (MCID.mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -514,7 +515,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                             /*Reg=*/0, /*isNormalMemory=*/false,
                             /*isMustAlias=*/false,
                             /*isArtificial=*/true));
-    } else if (TID.mayLoad()) {
+    } else if (MCID.mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index e6d7ded8a784..0e005d35189d 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -16,11 +16,11 @@
 #define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 
 using namespace llvm;
 
@@ -115,12 +115,12 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   // Use the itinerary for the underlying instruction to check for
   // free FU's in the scoreboard at the appropriate future cycles.
 
-  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
-  if (TID == NULL) {
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  if (MCID == NULL) {
     // Don't check hazards for non-machineinstr Nodes.
     return NoHazard;
   }
-  unsigned idx = TID->getSchedClass();
+  unsigned idx = MCID->getSchedClass();
   for (const InstrStage *IS = ItinData->beginStage(idx),
          *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must find one of the stage's units free for every cycle the
@@ -173,16 +173,16 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
 
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
-  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
-  assert(TID && "The scheduler must filter non-machineinstrs");
-  if (DAG->TII->isZeroCost(TID->Opcode))
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  assert(MCID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(MCID->Opcode))
     return;
 
   ++IssueCount;
 
   unsigned cycle = 0;
 
-  unsigned idx = TID->getSchedClass();
+  unsigned idx = MCID->getSchedClass();
   for (const InstrStage *IS = ItinData->beginStage(idx),
          *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e843f5fa340f..4f0d2caca22b 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -138,6 +138,10 @@ namespace {
     SDValue PromoteExtend(SDValue Op);
     bool PromoteLoad(SDValue Op);
 
+    void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                         SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                         ISD::NodeType ExtType);
+
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
     /// target-specific DAG combines.
@@ -234,6 +238,9 @@ namespace {
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
+    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                               bool DemandHighBits = true);
+    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
@@ -994,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
           dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
           dbgs() << '\n');
-    
+
     // Transfer debug value.
     DAG.TransferDbgValues(SDValue(N, 0), RV);
     WorkListRemover DeadNodes(*this);
@@ -1303,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
   return SDValue();
 }
 
-/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
-/// return 0 or 1 depending on the carry flag.
-static bool isCarryMaterialization(SDValue V) {
-  if (V.getOpcode() != ISD::ADDE)
-    return false;
-
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
-  return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
-}
-
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1476,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
 
-  // add (adde 0, 0, glue), X -> adde X, 0, glue
-  if (N0->hasOneUse() && isCarryMaterialization(N0))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
-                       DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
-                       N0.getOperand(2));
-
-  // add X, (adde 0, 0, glue) -> adde X, 0, glue
-  if (N1->hasOneUse() && isCarryMaterialization(N1))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
-                       DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
-                       N1.getOperand(2));
-
   return SDValue();
 }
 
@@ -1531,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                    N->getDebugLoc(), MVT::Glue));
   }
 
-  // addc (adde 0, 0, glue), X -> adde X, 0, glue
-  if (N0->hasOneUse() && isCarryMaterialization(N0))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
-                       DAG.getConstant(0, VT), N0.getOperand(2));
-
-  // addc X, (adde 0, 0, glue) -> adde X, 0, glue
-  if (N1->hasOneUse() && isCarryMaterialization(N1))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
-                       DAG.getConstant(0, VT), N1.getOperand(2));
-
   return SDValue();
 }
 
@@ -1591,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   EVT VT = N0.getValueType();
 
   // fold vector ops
@@ -1622,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (A+B)-B -> A
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
+  // fold C2-(A+C1) -> (C2-C1)-A
+  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+		       N1.getOperand(0));
+  }
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2508,6 +2491,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   return SDValue();
 }
 
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                                        bool DemandHighBits) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+  bool LookPassAnd0 = false;
+  bool LookPassAnd1 = false;
+  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+      std::swap(N0, N1);
+  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+      std::swap(N0, N1);
+  if (N0.getOpcode() == ISD::AND) {
+    if (!N0.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01C || N01C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N0 = N0.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  if (N1.getOpcode() == ISD::AND) {
+    if (!N1.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C || N11C->getZExtValue() != 0xFF)
+      return SDValue();
+    N1 = N1.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.getNode()->hasOneUse() ||
+      !N1.getNode()->hasOneUse())
+    return SDValue();
+
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+  if (!N01C || !N11C)
+    return SDValue();
+  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+    return SDValue();
+
+  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+  SDValue N00 = N0->getOperand(0);
+  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+    if (!N00.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+    if (!N001C || N001C->getZExtValue() != 0xFF)
+      return SDValue();
+    N00 = N00.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  SDValue N10 = N1->getOperand(0);
+  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+    if (!N10.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+    if (!N101C || N101C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N10 = N10.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N00 != N10)
+    return SDValue();
+
+  // Make sure everything beyond the low halfword is zero since the SRL 16
+  // will clear the top bits.
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  if (DemandHighBits && OpSizeInBits > 16 &&
+      (!LookPassAnd0 || !LookPassAnd1) &&
+      !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+    return SDValue();
+
+  SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+  if (OpSizeInBits > 16)
+    Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+  if (!N.getNode()->hasOneUse())
+    return false;
+
+  unsigned Opc = N.getOpcode();
+  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+    return false;
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!N1C)
+    return false;
+
+  unsigned Num;
+  switch (N1C->getZExtValue()) {
+  default:
+    return false;
+  case 0xFF:       Num = 0; break;
+  case 0xFF00:     Num = 1; break;
+  case 0xFF0000:   Num = 2; break;
+  case 0xFF000000: Num = 3; break;
+  }
+
+  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+  SDValue N0 = N.getOperand(0);
+  if (Opc == ISD::AND) {
+    if (Num == 0 || Num == 2) {
+      // (x >> 8) & 0xff
+      // (x >> 8) & 0xff0000
+      if (N0.getOpcode() != ISD::SRL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    } else {
+      // (x << 8) & 0xff00
+      // (x << 8) & 0xff000000
+      if (N0.getOpcode() != ISD::SHL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    }
+  } else if (Opc == ISD::SHL) {
+    // (x & 0xff) << 8
+    // (x & 0xff0000) << 8
+    if (Num != 0 && Num != 2)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  } else { // Opc == ISD::SRL
+    // (x & 0xff00) >> 8
+    // (x & 0xff000000) >> 8
+    if (Num != 1 && Num != 3)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  }
+
+  if (Parts[Num])
+    return false;
+
+  Parts[Num] = N0.getOperand(0).getNode();
+  return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+  // Look for either
+  // (or (or (and), (and)), (or (and), (and)))
+  // (or (or (or (and), (and)), (and)), (and))
+  if (N0.getOpcode() != ISD::OR)
+    return SDValue();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+
+  if (N1.getOpcode() == ISD::OR) {
+    // (or (or (and), (and)), (or (and), (and)))
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+    SDValue N010 = N01.getOperand(0);
+    if (!isBSwapHWordElement(N010, Parts))
+      return SDValue();
+    SDValue N011 = N01.getOperand(1);
+    if (!isBSwapHWordElement(N011, Parts))
+      return SDValue();
+  } else {
+    // (or (or (or (and), (and)), (and)), (and))
+    if (!isBSwapHWordElement(N1, Parts))
+      return SDValue();
+    if (!isBSwapHWordElement(N01, Parts))
+      return SDValue();
+    if (N00.getOpcode() != ISD::OR)
+      return SDValue();
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+  }
+
+  // Make sure the parts are all coming from the same node.
+  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+    return SDValue();
+
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+                              SDValue(Parts[0],0));
+
+  // Result of the bswap should be rotated by 16. If it's not legal, than
+  // do  (x << 16) | (x >> 16).
+  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+    return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+    return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2543,6 +2764,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
+
+  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+  BSwap = MatchBSwapHWordLow(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+
   // reassociate or
   SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
   if (ROR.getNode() != 0)
@@ -3030,6 +3260,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl x, 0) -> x
   if (N1C && N1C->isNullValue())
     return N0;
+  // fold (shl undef, x) -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
@@ -3696,6 +3929,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   return true;
 }
 
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                                  SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                                  ISD::NodeType ExtType) {
+  // Extend SetCC uses if necessary.
+  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+    SDNode *SetCC = SetCCs[i];
+    SmallVector<SDValue, 4> Ops;
+
+    for (unsigned j = 0; j != 2; ++j) {
+      SDValue SOp = SetCC->getOperand(j);
+      if (SOp == Trunc)
+        Ops.push_back(ExtLoad);
+      else
+        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+    }
+
+    Ops.push_back(SetCC->getOperand(2));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+                                 &Ops[0], Ops.size()));
+  }
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3784,27 +4039,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::SIGN_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -3832,6 +4068,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (sext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (sextload x), (sext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.sext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::SIGN_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
@@ -3990,27 +4265,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
 
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ZERO_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
 
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
+  // fold (zext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (zextload x), (zext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.zext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::ZERO_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
-
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -4198,27 +4494,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ANY_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -4555,6 +4832,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
+
+  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+    SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+                                       N0.getOperand(1), false);
+    if (BSwap.getNode() != 0)
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         BSwap, N1);
+  }
+
   return SDValue();
 }
 
@@ -5180,7 +5467,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -5204,7 +5492,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -5648,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 
   // Now check for #3 and #4.
   bool RealUse = false;
+
+  // Caches for hasPredecessorHelper
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+
   for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
          E = Ptr.getNode()->use_end(); I != E; ++I) {
     SDNode *Use = *I;
     if (Use == N)
       continue;
-    if (Use->isPredecessorOf(N))
+    if (N->hasPredecessorHelper(Use, Visited, Worklist))
       return false;
 
     if (!((Use->getOpcode() == ISD::LOAD &&
@@ -6431,8 +6725,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
     SDValue Shorter =
       GetDemandedBits(Value,
-                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
-                                           ST->getMemoryVT().getSizeInBits()));
+                      APInt::getLowBitsSet(
+                        Value.getValueType().getScalarType().getSizeInBits(),
+                        ST->getMemoryVT().getScalarType().getSizeInBits()));
     AddToWorkList(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
@@ -7156,7 +7451,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         const TargetData &TD = *TLI.getTargetData();
 
         // Create a ConstantArray of the two constants.
-        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
                                             TD.getPrefTypeAlignment(FPTy));
         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 797f17444850..54a7d43f46d6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) {
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
     const DbgValueInst *DI = cast<DbgValueInst>(Call);
-    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
@@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) {
         .addReg(0U).addImm(DI->getOffset())
         .addMetadata(DI->getVariable());
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
-        .addImm(CI->getZExtValue()).addImm(DI->getOffset())
-        .addMetadata(DI->getVariable());
+      if (CI->getBitWidth() > 64)
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addCImm(CI).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
+      else 
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
         .addFPImm(CF).addImm(DI->getOffset())
@@ -847,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) {
     return false; // fast-isel can't handle aggregate constants at the moment
 
   // Get the actual result register, which is an offset from the base register.
-  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end());
+  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
 unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
                                  const TargetRegisterClass* RC) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
   return ResultReg;
@@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    unsigned Op1, bool Op1IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
                                    unsigned Op1, bool Op1IsKill,
                                    unsigned Op2, bool Op2IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
                                     unsigned Op1, bool Op1IsKill,
                                     uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
@@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cb49a80b67e6..f0f4743298e7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   // the CopyToReg'd destination register instead of creating a new vreg.
   bool MatchReg = true;
   const TargetRegisterClass *UseRC = NULL;
+  EVT VT = Node->getValueType(ResNo);
+
+  // Stick to the preferred register classes for legal types.
+  if (TLI->isTypeLegal(VT))
+    UseRC = TLI->getRegClassFor(VT);
+
   if (!IsClone && !IsCloned)
     for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
          UI != E; ++UI) {
@@ -100,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
             continue;
           Match = false;
           if (User->isMachineOpcode()) {
-            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const MCInstrDesc &II = TII->get(User->getMachineOpcode());
             const TargetRegisterClass *RC = 0;
             if (i+II.getNumDefs() < II.getNumOperands())
-              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+              RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
             if (!UseRC)
               UseRC = RC;
             else if (RC) {
@@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
         break;
     }
 
-  EVT VT = Node->getValueType(ResNo);
   const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
   SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
-  
+
   // Figure out the register class to create for the destreg.
   if (VRBase) {
     DstRC = MRI->getRegClass(VRBase);
@@ -173,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
 }
 
 void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                                       const TargetInstrDesc &II,
+                                       const MCInstrDesc &II,
                                        bool IsClone, bool IsCloned,
                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
   assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
@@ -184,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     // is a vreg in the same register class, use the CopyToReg'd destination
     // register instead of creating a new vreg.
     unsigned VRBase = 0;
-    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
     if (II.OpInfo[i].isOptionalDef()) {
       // Optional def must be a physical register.
       unsigned NumResults = CountResults(Node);
@@ -237,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
       Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
     unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
-    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
       const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
@@ -260,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
 void
 InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  unsigned IIOpNum,
-                                 const TargetInstrDesc *II,
+                                 const MCInstrDesc *II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
                                  bool IsDebug, bool IsClone, bool IsCloned) {
   assert(Op.getValueType() != MVT::Other &&
@@ -270,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  bool isOptDef = IIOpNum < TID.getNumOperands() &&
-    TID.OpInfo[IIOpNum].isOptionalDef();
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+    MCID.OpInfo[IIOpNum].isOptionalDef();
 
   // If the instruction requires a register in a different class, create
   // a new virtual register and copy the value into it.
@@ -280,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
-      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
-    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+      DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
     if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -307,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     while (Idx > 0 &&
            MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
       --Idx;
-    bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+    bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
     if (isTied)
       isKill = false;
   }
@@ -325,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
 /// assertions only.
 void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                               unsigned IIOpNum,
-                              const TargetInstrDesc *II,
+                              const MCInstrDesc *II,
                               DenseMap<SDValue, unsigned> &VRBaseMap,
                               bool IsDebug, bool IsClone, bool IsCloned) {
   if (Op.isMachineOpcode()) {
@@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 void InstrEmitter::EmitRegSequence(SDNode *Node,
                                   DenseMap<SDValue, unsigned> &VRBaseMap,
                                   bool IsClone, bool IsCloned) {
-  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
   unsigned NewVReg = MRI->createVirtualRegister(RC);
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
                              TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
   unsigned NumOps = Node->getNumOperands();
-  assert((NumOps & 1) == 0 &&
-         "REG_SEQUENCE must have an even number of operands!");
-  const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
-  for (unsigned i = 0; i != NumOps; ++i) {
+  assert((NumOps & 1) == 1 &&
+         "REG_SEQUENCE must have an odd number of operands!");
+  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
-    if (i & 1) {
+    if ((i & 1) == 0) {
       unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
       unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
       const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -591,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
   }
   // Otherwise, we're going to create an instruction here.
-  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
   MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
   if (SD->getKind() == SDDbgValue::SDNODE) {
     SDNode *Node = SD->getSDNode();
@@ -610,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   } else if (SD->getKind() == SDDbgValue::CONST) {
     const Value *V = SD->getConst();
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      // FIXME: SDDbgValue constants aren't updated with legalization, so it's 
-      // possible to have i128 constants in them at this point. Dwarf writer
-      // does not handle i128 constants at the moment so, as a crude workaround,
-      // just drop the debug info if this happens.
-      if (!CI->getValue().isSignedIntN(64))
-        MIB.addReg(0U);
+      if (CI->getBitWidth() > 64)
+        MIB.addCImm(CI);
       else
         MIB.addImm(CI->getSExtValue());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
@@ -666,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     // We want a unique VR for each IMPLICIT_DEF use.
     return;
   
-  const TargetInstrDesc &II = TII->get(Opc);
+  const MCInstrDesc &II = TII->get(Opc);
   unsigned NumResults = CountResults(Node);
   unsigned NodeOperands = CountOperands(Node);
   bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
@@ -695,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
       else {
         // Collect declared implicit uses.
-        const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
-        UsedRegs.append(TID.getImplicitUses(),
-                        TID.getImplicitUses() + TID.getNumImplicitUses());
+        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+        UsedRegs.append(MCID.getImplicitUses(),
+                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
         // In addition to declared implicit uses, we must also check for
         // direct RegisterSDNode operands.
         for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
@@ -849,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         }
         break;
       case InlineAsm::Kind_RegDefEarlyClobber:
+      case InlineAsm::Kind_Clobber:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 02c044c3f8f1..19fc0445b166 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-class TargetInstrDesc;
+class MCInstrDesc;
 class SDDbgValue;
 
 class InstrEmitter {
@@ -49,7 +49,7 @@ class InstrEmitter {
                                     unsigned ResNo) const;
 
   void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                              const TargetInstrDesc &II,
+                              const MCInstrDesc &II,
                               bool IsClone, bool IsCloned,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
 
@@ -63,7 +63,7 @@ class InstrEmitter {
   /// not in the required register class.
   void AddRegisterOperand(MachineInstr *MI, SDValue Op,
                           unsigned IIOpNum,
-                          const TargetInstrDesc *II,
+                          const MCInstrDesc *II,
                           DenseMap<SDValue, unsigned> &VRBaseMap,
                           bool IsDebug, bool IsClone, bool IsCloned);
 
@@ -73,7 +73,7 @@ class InstrEmitter {
   /// assertions only.
   void AddOperand(MachineInstr *MI, SDValue Op,
                   unsigned IIOpNum,
-                  const TargetInstrDesc *II,
+                  const MCInstrDesc *II,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
                   bool IsDebug, bool IsClone, bool IsCloned);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62d777ca3314..d06e2bdce065 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -58,17 +58,6 @@ class SelectionDAGLegalize {
   /// against each other, including inserted libcalls.
   SmallVector<SDValue, 8> LastCALLSEQ;
 
-  enum LegalizeAction {
-    Legal,      // The target natively supports this operation.
-    Promote,    // This operation should be executed in a larger type.
-    Expand      // Try to expand this to other ops, otherwise use a libcall.
-  };
-
-  /// ValueTypeActions - This is a bitvector that contains two bits for each
-  /// value type, where the two bits correspond to the LegalizeAction enum.
-  /// This can be queried with "getTypeAction(VT)".
-  TargetLowering::ValueTypeActionImpl ValueTypeActions;
-
   /// LegalizedNodes - For nodes that are of legal width, and that have more
   /// than one use, this map indicates what regularized operand to use.  This
   /// allows us to avoid legalizing the same thing more than once.
@@ -87,25 +76,11 @@ class SelectionDAGLegalize {
 public:
   explicit SelectionDAGLegalize(SelectionDAG &DAG);
 
-  /// getTypeAction - Return how we should legalize values of this type, either
-  /// it is already legal or we need to expand it into multiple registers of
-  /// smaller integer type, or we need to promote it to a larger type.
-  LegalizeAction getTypeAction(EVT VT) const {
-    return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT);
-  }
-
-  /// isTypeLegal - Return true if this type is legal on this target.
-  ///
-  bool isTypeLegal(EVT VT) const {
-    return getTypeAction(VT) == Legal;
-  }
-
   void LegalizeDAG();
 
 private:
-  /// LegalizeOp - We know that the specified value has a legal type.
-  /// Recursively ensure that the operands have legal types, then return the
-  /// result.
+  /// LegalizeOp - Return a legal replacement for the given operation, with
+  /// all legal operands.
   SDValue LegalizeOp(SDValue O);
 
   SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
 
 SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
   : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
-    DAG(dag),
-    ValueTypeActions(TLI.getValueTypeActions()) {
-  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
-         "Too many value types for ValueTypeActions to hold!");
+    DAG(dag) {
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
@@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   DebugLoc dl = ST->getDebugLoc();
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
     if (CFP->getValueType(0) == MVT::f32 &&
-        getTypeAction(MVT::i32) == Legal) {
+        TLI.isTypeLegal(MVT::i32)) {
       Tmp3 = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
@@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 
     if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
-      if (getTypeAction(MVT::i64) == Legal) {
+      if (TLI.isTypeLegal(MVT::i64)) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
         return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                             isVolatile, isNonTemporal, Alignment);
       }
 
-      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+      if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
@@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   return SDValue(0, 0);
 }
 
-/// LegalizeOp - We know that the specified value has a legal type, and
-/// that its operands are legal.  Now ensure that the operation itself
-/// is legal, recursively ensuring that the operands' operations remain
-/// legal.
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
 SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
     return Op;
@@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   DebugLoc dl = Node->getDebugLoc();
 
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-    assert(getTypeAction(Node->getValueType(i)) == Legal &&
+    assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+             TargetLowering::TypeLegal &&
            "Unexpected illegal type!");
 
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-    assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+    assert((TLI.getTypeAction(*DAG.getContext(),
+                              Node->getOperand(i).getValueType()) ==
+              TargetLowering::TypeLegal ||
             Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
            "Unexpected illegal type!");
 
@@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         }
         break;
       case TargetLowering::Expand:
-        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
           SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
                                      LD->getPointerInfo(),
                                      LD->isVolatile(), LD->isNonTemporal(),
@@ -1374,6 +1347,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
         // whether they are legal or not, and then we end up here without any
@@ -1548,9 +1606,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         case TargetLowering::Custom:
           Result = TLI.LowerOperation(Result, DAG);
           break;
-        case Expand:
+        case TargetLowering::Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = TLI.isTypeLegal(WideScalarVT);
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
           // TRUNCSTORE:i16 i32 -> STORE i16
-          assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+          assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                                 isVolatile, isNonTemporal, Alignment);
@@ -1709,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   SDValue SignBit;
   EVT FloatVT = Tmp2.getValueType();
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
-  if (isTypeLegal(IVT)) {
+  if (TLI.isTypeLegal(IVT)) {
     // Convert to an integer with the same sign bit.
     SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
   } else {
@@ -3031,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
-    if (getTypeAction(EltVT) == Promote)
+    if (!TLI.isTypeLegal(EltVT))
       EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
     unsigned NumElems = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops;
@@ -3184,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                       RTLIB::REM_F80, RTLIB::REM_PPCF128));
     break;
+  case ISD::FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+    break;
   case ISD::FP16_TO_FP32:
     Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 27a466b3a928..e6835d87f82c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
     case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
     case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::FMA:         R = SoftenFloatRes_FMA(N); break;
     case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
     case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
     case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
@@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
                      NVT, &Op, 1, false, N->getDebugLoc());
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)),
+                     GetSoftenedFloat(N->getOperand(2)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FMA_F32,
+                                  RTLIB::FMA_F64,
+                                  RTLIB::FMA_F80,
+                                  RTLIB::FMA_PPCF128),
+                     NVT, Ops, 3, false, N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
@@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
   case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
   case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::FMA:        ExpandFloatRes_FMA(N, Lo, Hi); break;
   case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
   case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
   case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
@@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
   GetPairElements(Call, Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::FMA_F32,
+                                          RTLIB::FMA_F64,
+                                          RTLIB::FMA_F80,
+                                          RTLIB::FMA_PPCF128),
+                             N->getValueType(0), Ops, 3, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8da57f4ffe0..e7c77dd10cb6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
-    if (NInVT.isVector())
-      // Promote vector element via memory load/store.
-      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                         CreateStackStoreLoad(InOp, OutVT));
     break;
   case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
@@ -204,8 +201,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     break;
   case TargetLowering::TypeScalarizeVector:
     // Convert the element to an integer and promote it by hand.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                       BitConvertToInteger(GetScalarizedVector(InOp)));
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         BitConvertToInteger(GetScalarizedVector(InOp)));
+    break;
   case TargetLowering::TypeSplitVector: {
     // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
@@ -339,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
   return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
-                     ISD::AssertZext : ISD::AssertSext, dl,
-                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+                     ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+                     DAG.getValueType(N->getValueType(0).getScalarType()));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
@@ -370,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
         return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
                            DAG.getValueType(N->getOperand(0).getValueType()));
       if (N->getOpcode() == ISD::ZERO_EXTEND)
-        return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+        return DAG.getZeroExtendInReg(Res, dl,
+                      N->getOperand(0).getValueType().getScalarType());
       assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
       return Res;
     }
@@ -520,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res;
+  SDValue InOp = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
 
-  switch (getTypeAction(N->getOperand(0).getValueType())) {
+  switch (getTypeAction(InOp.getValueType())) {
   default: llvm_unreachable("Unknown type action!");
   case TargetLowering::TypeLegal:
   case TargetLowering::TypeExpandInteger:
-    Res = N->getOperand(0);
+    Res = InOp;
     break;
   case TargetLowering::TypePromoteInteger:
-    Res = GetPromotedInteger(N->getOperand(0));
+    Res = GetPromotedInteger(InOp);
     break;
+  case TargetLowering::TypeSplitVector:
+    EVT InVT = InOp.getValueType();
+    assert(InVT.isVector() && "Cannot split scalar types");
+    unsigned NumElts = InVT.getVectorNumElements();
+    assert(NumElts == NVT.getVectorNumElements() &&
+           "Dst and Src must have the same number of elements");
+    EVT EltVT = InVT.getScalarType();
+    assert(isPowerOf2_32(NumElts) &&
+           "Promoted vector type must be a power of two");
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+    EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+                                   NumElts/2);
+
+    SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(0));
+    SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(NumElts/2));
+    EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+    EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
   }
 
   // Truncate to NVT instead of VT
-  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
@@ -970,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
-  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+  return DAG.getZeroExtendInReg(Op, dl,
+                                N->getOperand(0).getValueType().getScalarType());
 }
 
 
@@ -1069,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2146,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  DebugLoc dl = N->getDebugLoc();
+
+  // A divide for UMULO should be faster than a function call.
+  if (N->getOpcode() == ISD::UMULO) {
+    SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SplitInteger(MUL, Lo, Hi);
+
+    // A divide for UMULO will be faster than a function call. Select to
+    // make sure we aren't using 0.
+    SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+				  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+				  DAG.getConstant(1, VT), RHS);
+    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+    SDValue Overflow;
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    ReplaceValueWith(SDValue(N, 1), Overflow);
+    return;
+  }
+
+  // Replace this with a libcall that will check overflow.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i32)
+    LC = RTLIB::MULO_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MULO_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MULO_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+  SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+  // Temporary for the overflow value, default it to zero.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+			       DAG.getConstant(0, PtrVT), Temp,
+			       MachinePointerInfo(), false, false, 0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = true;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the address of the overflow check.
+  Entry.Node = Temp;
+  Entry.Ty = PtrTy->getPointerTo();
+  Entry.isSExt = true;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+		    0, TLI.getLibcallCallingConv(LC), false,
+		    true, Func, Args, DAG, dl);
+
+  SplitInteger(CallInfo.first, Lo, Hi);
+  SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+			      MachinePointerInfo(), false, false, 0);
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+                             DAG.getConstant(0, PtrVT),
+                             ISD::SETNE);
+  // Use the overflow from the libcall everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -2638,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
   SDValue InOp0 = N->getOperand(0);
   EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
-  unsigned OutNumElems = N->getValueType(0).getVectorNumElements();
+  unsigned OutNumElems = OutVT.getVectorNumElements();
   EVT NOutVTElem = NOutVT.getVectorElementType();
 
   DebugLoc dl = N->getDebugLoc();
   SDValue BaseIdx = N->getOperand(1);
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(OutNumElems);
   for (unsigned i = 0; i != OutNumElems; ++i) {
 
     // Extract the element from the original vector.
@@ -2681,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
 
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
   SDValue V1 = GetPromotedInteger(N->getOperand(1));
-  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  EVT OutVT = V0.getValueType();
 
-  return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]);
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
 }
 
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2702,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(NumElems);
   for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
     Ops.push_back(Op);
@@ -2714,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-  assert(!InVT.isVector() && "Input must not be a scalar");
+  assert(!N->getOperand(0).getValueType().isVector() &&
+         "Input must be a scalar");
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -2730,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT InElVT = InVT.getVectorElementType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2744,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0);
+  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                                        N->getOperand(0));
 
   SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
     NOutVTElem, N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b2f966bb7d4c..952797dc75b8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -201,7 +201,7 @@ private:
     EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
-    return DAG.getZeroExtendInReg(Op, dl, OldVT);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
   }
 
   // Integer Result Promotion.
@@ -318,6 +318,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -377,6 +378,7 @@ private:
   SDValue SoftenFloatRes_FLOG(SDNode *N);
   SDValue SoftenFloatRes_FLOG2(SDNode *N);
   SDValue SoftenFloatRes_FLOG10(SDNode *N);
+  SDValue SoftenFloatRes_FMA(SDNode *N);
   SDValue SoftenFloatRes_FMUL(SDNode *N);
   SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
   SDValue SoftenFloatRes_FNEG(SDNode *N);
@@ -441,6 +443,7 @@ private:
   void ExpandFloatRes_FLOG      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FLOG2     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FLOG10    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMA       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMUL      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FNEG      (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 5d0f923afb0f..ffff10ce2948 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
-  case ISD::SIGN_EXTEND_INREG:
   case ISD::FP_ROUND_INREG:
     QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9595f6947feb..b5698f9c6738 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2164,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
     if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       RetVT = MemVT;
@@ -2179,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 7b560d173ed3..b275c6321ae4 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
       
-    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
         NewSU->isTwoAddress = true;
         break;
       }
     }
-    if (TID.isCommutable())
+    if (MCID.isCommutable())
       NewSU->isCommutable = true;
 
     // LoadNode may already exist. This can happen when there is another
@@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// FIXME: Move to SelectionDAG?
 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
-  unsigned NumRes = TID.getNumDefs();
-  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
 
         ++i; // Skip the ID value.
         if (InlineAsm::isRegDefKind(Flags) ||
-            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     }
     if (!Node->isMachineOpcode())
       continue;
-    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
-    if (!TID.ImplicitDefs)
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8d61a898f6b3..12b183804c28 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -276,6 +276,43 @@ private:
 };
 }  // end anonymous namespace
 
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+                          const TargetLowering *TLI,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          unsigned &RegClass, unsigned &Cost) {
+  EVT VT = RegDefPos.GetValue();
+
+  // Special handling for untyped values.  These values can only come from
+  // the expansion of custom DAG-to-DAG patterns.
+  if (VT == MVT::untyped) {
+    const SDNode *Node = RegDefPos.GetNode();
+    unsigned Opcode = Node->getMachineOpcode();
+
+    if (Opcode == TargetOpcode::REG_SEQUENCE) {
+      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+      RegClass = RC->getID();
+      Cost = 1;
+      return;
+    }
+
+    unsigned Idx = RegDefPos.GetIdx();
+    const MCInstrDesc Desc = TII->get(Opcode);
+    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+    RegClass = RC->getID();
+    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+    // better way to determine it.
+    Cost = 1;
+  } else {
+    RegClass = TLI->getRepRegClassFor(VT)->getID();
+    Cost = TLI->getRepRegClassCostFor(VT);
+  }
+}
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
@@ -800,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
 
-    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
         NewSU->isTwoAddress = true;
         break;
       }
     }
-    if (TID.isCommutable())
+    if (MCID.isCommutable())
       NewSU->isCommutable = true;
 
     InitNumRegDefsLeft(NewSU);
@@ -987,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// FIXME: Move to SelectionDAG?
 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
-  unsigned NumRes = TID.getNumDefs();
-  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -1055,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
         ++i; // Skip the ID value.
         if (InlineAsm::isRegDefKind(Flags) ||
-            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -1070,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
     if (!Node->isMachineOpcode())
       continue;
-    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
-    if (!TID.ImplicitDefs)
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
 
@@ -1369,6 +1407,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
 };
 
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+  SF &SortFunc;
+  reverse_sort(SF &sf) : SortFunc(sf) {}
+  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+  bool operator()(SUnit* left, SUnit* right) const {
+    // reverse left/right rather than simply !SortFunc(left, right)
+    // to expose different paths in the comparison logic.
+    return SortFunc(right, left);
+  }
+};
+#endif // NDEBUG
+
 /// bu_ls_rr_sort - Priority function for bottom up register pressure
 // reduction scheduler.
 struct bu_ls_rr_sort : public queue_sort {
@@ -1569,20 +1622,33 @@ protected:
 };
 
 template<class SF>
-class RegReductionPriorityQueue : public RegReductionPQBase {
-  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
-    std::vector<SUnit *>::iterator Best = Q.begin();
-    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
-           E = Q.end(); I != E; ++I)
-      if (Picker(*Best, *I))
-        Best = I;
-    SUnit *V = *Best;
-    if (Best != prior(Q.end()))
-      std::swap(*Best, Q.back());
-    Q.pop_back();
-    return V;
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+  std::vector<SUnit *>::iterator Best = Q.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+         E = Q.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Q.end()))
+    std::swap(*Best, Q.back());
+  Q.pop_back();
+  return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+  if (DAG->StressSched) {
+    reverse_sort<SF> RPicker(Picker);
+    return popFromQueueImpl(Q, RPicker);
   }
+#endif
+  (void)DAG;
+  return popFromQueueImpl(Q, Picker);
+}
 
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
   SF Picker;
 
 public:
@@ -1603,7 +1669,7 @@ public:
   SUnit *pop() {
     if (Queue.empty()) return NULL;
 
-    SUnit *V = popFromQueue(Queue, Picker);
+    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
     V->NodeQueueId = 0;
     return V;
   }
@@ -1613,7 +1679,7 @@ public:
     std::vector<SUnit*> DumpQueue = Queue;
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
-      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
       if (isBottomUp())
         dbgs() << "Height " << SU->getHeight() << ": ";
       else
@@ -1778,9 +1844,9 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
     }
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
-      EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
         return true;
     }
@@ -1891,9 +1957,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
       if (SkipRegDefs)
         continue;
-      EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+      RegPressure[RCId] += Cost;
       break;
     }
   }
@@ -1906,16 +1973,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
     if (SkipRegDefs > 0)
       continue;
-    EVT VT = RegDefPos.GetValue();
-    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+    unsigned RCId, Cost;
+    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+    if (RegPressure[RCId] < Cost) {
       // Register pressure tracking is imprecise. This can happen. But we try
       // hard not to let it happen because it likely results in poor scheduling.
       DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
       RegPressure[RCId] = 0;
     }
     else {
-      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      RegPressure[RCId] -= Cost;
     }
   }
   dumpRegPressure();
@@ -1962,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
     unsigned POpc = PN->getMachineOpcode();
     if (POpc == TargetOpcode::IMPLICIT_DEF)
       continue;
-    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-      EVT VT = PN->getOperand(0).getValueType();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-      continue;
-    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-               POpc == TargetOpcode::SUBREG_TO_REG) {
+    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+        POpc == TargetOpcode::INSERT_SUBREG ||
+        POpc == TargetOpcode::SUBREG_TO_REG) {
       EVT VT = PN->getValueType(0);
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
@@ -2543,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
   if (SU->isTwoAddress) {
     unsigned Opc = SU->getNode()->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    unsigned NumRes = TID.getNumDefs();
-    unsigned NumOps = TID.getNumOperands() - NumRes;
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
     for (unsigned i = 0; i != NumOps; ++i) {
-      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
         SDNode *DU = SU->getNode()->getOperand(i).getNode();
         if (DU->getNodeId() != -1 &&
             Op->OrigNode == &(*SUnits)[DU->getNodeId()])
@@ -2727,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
 
     bool isLiveOut = hasOnlyLiveOutUses(SU);
     unsigned Opc = Node->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    unsigned NumRes = TID.getNumDefs();
-    unsigned NumOps = TID.getNumOperands() - NumRes;
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
     for (unsigned j = 0; j != NumOps; ++j) {
-      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
         continue;
       SDNode *DU = SU->getNode()->getOperand(j).getNode();
       if (DU->getNodeId() == -1)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 9f2f0121a86d..71f07d6fa47a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,11 +17,12 @@
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
 
   unsigned ResNo = User->getOperand(2).getResNo();
   if (Def->isMachineOpcode()) {
-    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
     if (ResNo >= II.getNumDefs() &&
         II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
       PhysReg = Reg;
@@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() {
       continue;
 
     unsigned Opc = Node->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    if (TID.mayLoad())
+    const MCInstrDesc &MCID = TII->get(Opc);
+    if (MCID.mayLoad())
       // Cluster loads from "near" addresses into combined SUnits.
       ClusterNeighboringLoads(Node);
   }
@@ -378,7 +379,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
-  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
@@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 
     if (MainNode->isMachineOpcode()) {
       unsigned Opc = MainNode->getMachineOpcode();
-      const TargetInstrDesc &TID = TII->get(Opc);
-      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+      const MCInstrDesc &MCID = TII->get(Opc);
+      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
           SU->isTwoAddress = true;
           break;
         }
       }
-      if (TID.isCommutable())
+      if (MCID.isCommutable())
         SU->isCommutable = true;
     }
 
@@ -435,7 +436,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         // it requires a cross class copy (cost < 0). That means we are only
         // treating "expensive to copy" register dependency as physical register
         // dependency. This may change in the future though.
-        if (Cost >= 0)
+        if (Cost >= 0 && !StressSched)
           PhysReg = 0;
 
         // If this is a ctrl dep, latency is 1.
@@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
     for (;DefIdx < NodeNumDefs; ++DefIdx) {
       if (!Node->hasAnyUseOfValue(DefIdx))
         continue;
-      if (Node->isMachineOpcode() &&
-          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
-        // Propagate the incoming (full-register) type. I doubt it's needed.
-        ValueType = Node->getOperand(0).getValueType();
-      }
-      else {
-        ValueType = Node->getValueType(DefIdx);
-      }
+      ValueType = Node->getValueType(DefIdx);
       ++DefIdx;
       return; // Found a normal regdef.
     }
@@ -649,7 +643,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
   // order number right after the N.
   MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
-  SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+  ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
   for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
     if (DVs[i]->isInvalidated())
       continue;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index b5f68f3055cf..9c27b2ea02ec 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -135,6 +135,14 @@ namespace llvm {
         return ValueType;
       }
 
+      const SDNode *GetNode() const {
+        return Node;
+      }
+
+      unsigned GetIdx() const {
+        return DefIdx-1;
+      }
+
       void Advance();
     private:
       void InitNodeNumDefs();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68eeb609d401..35ea0bb940b5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
   Ordering->remove(N);
 
   // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
-  SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+  ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
   for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
     DbgVals[i]->setIsInvalidated();
 }
@@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
          "Expecting memcpy / memset source to meet alignment requirement!");
-  // If 'SrcAlign' is zero, that means the memory operation does not need load
-  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
-  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
-  // specified alignment of the memory operation. If it is zero, that means
-  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
-  // indicates whether the memcpy source is constant so it does not need to be
-  // loaded.
+  // If 'SrcAlign' is zero, that means the memory operation does not need to
+  // load the value, i.e. memset or memcpy from constant string. Otherwise,
+  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+  // is the specified alignment of the memory operation. If it is zero, that
+  // means it's possible to change the alignment of the destination.
+  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+  // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
                                    NonScalarIntSafe, MemcpyStrSrc,
                                    DAG.getMachineFunction());
@@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       MachinePointerInfo PtrInfo, EVT MemVT,
                       bool isVolatile, bool isNonTemporal,
                       unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
@@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachinePointerInfo PtrInfo,
                                bool isVolatile, bool isNonTemporal,
                                unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
@@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachineMemOperand *MMO) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     EVT SVT,bool isVolatile, bool isNonTemporal,
                                     unsigned Alignment,
                                     const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
@@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     MachineMemOperand *MMO) {
   EVT VT = Val.getValueType();
 
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (VT == SVT)
     return getStore(Chain, dl, Val, Ptr, MMO);
 
@@ -5508,9 +5518,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
     return;
   SDNode *FromNode = From.getNode();
   SDNode *ToNode = To.getNode();
-  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
   SmallVector<SDDbgValue *, 2> ClonedDVs;
-  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+  for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
        I != E; ++I) {
     SDDbgValue *Dbg = *I;
     if (Dbg->getKind() == SDDbgValue::SDNODE) {
@@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
   return false;
 }
 
-/// isPredecessorOf - Return true if this node is a predecessor of N. This node
-/// is either an operand of N or it can be reached by traversing up the operands.
-/// NOTE: this is an expensive method. Use it carefully.
-bool SDNode::isPredecessorOf(SDNode *N) const {
-  SmallPtrSet<SDNode *, 32> Visited;
-  SmallVector<SDNode *, 16> Worklist;
-  Worklist.push_back(N);
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+  return hasPredecessorHelper(N, Visited, Worklist);
+}
 
-  do {
-    N = Worklist.pop_back_val();
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-      SDNode *Op = N->getOperand(i).getNode();
-      if (Op == this)
-        return true;
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+                                  SmallPtrSet<const SDNode *, 32> &Visited,
+                                  SmallVector<const SDNode *, 16> &Worklist) const {
+  if (Visited.empty()) {
+    Worklist.push_back(this);
+  } else {
+    // Take a look in the visited set. If we've already encountered this node
+    // we needn't search further.
+    if (Visited.count(N))
+      return true;
+  }
+
+  // Haven't visited N yet. Continue the search.
+  while (!Worklist.empty()) {
+    const SDNode *M = Worklist.pop_back_val();
+    for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+      SDNode *Op = M->getOperand(i).getNode();
       if (Visited.insert(Op))
         Worklist.push_back(Op);
+      if (Op == N)
+        return true;
     }
-  } while (!Worklist.empty());
+  }
 
   return false;
 }
@@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FSUB:   return "fsub";
   case ISD::FMUL:   return "fmul";
   case ISD::FDIV:   return "fdiv";
+  case ISD::FMA:    return "fma";
   case ISD::FREM:   return "frem";
   case ISD::FCOPYSIGN: return "fcopysign";
   case ISD::FGETSIGN:  return "fgetsign";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7a8a975d0294..81b03ee76a5c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -286,22 +286,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
       "Cannot handle this kind of promotion");
     // Promoted vector extract
-    unsigned NumElts = ValueVT.getVectorNumElements();
-    SmallVector<SDValue, 8> NewOps;
-    for (unsigned i = 0; i < NumElts; ++i) {
-      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-        PartVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
-      SDValue Cast;
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
 
-      bool Smaller = ValueVT.bitsLE(PartVT);
-
-      Cast = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                         DL, ValueVT.getScalarType(), Ext);
-
-      NewOps.push_back(Cast);
-    }
-    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT,
-      &NewOps[0], NewOps.size());
   }
 
   // Trivial bitcast if the types are the same size and the destination
@@ -310,9 +298,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-  assert(ValueVT.getVectorElementType() == PartVT &&
-         ValueVT.getVectorNumElements() == 1 &&
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
          "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
 
@@ -453,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
       EVT ElementVT = PartVT.getVectorElementType();
       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
@@ -475,28 +471,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else if (PartVT.isVector() &&
                PartVT.getVectorElementType().bitsGE(
-                 ValueVT.getVectorElementType())&&
+                 ValueVT.getVectorElementType()) &&
                PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      unsigned NumElts = ValueVT.getVectorNumElements();
-      SmallVector<SDValue, 8> NewOps;
-      for (unsigned i = 0; i < NumElts; ++i) {
-        SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                       ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
-        SDValue Cast = DAG.getNode(ISD::ANY_EXTEND,
-                       DL, PartVT.getScalarType(), Ext);
-        NewOps.push_back(Cast);
-      }
-      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT,
-                        &NewOps[0], NewOps.size());
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
     } else{
       // Vector -> scalar conversion.
-      assert(ValueVT.getVectorElementType() == PartVT &&
-             ValueVT.getVectorNumElements() == 1 &&
+      assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                         PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
     }
 
     Parts[0] = Val;
@@ -1280,6 +1271,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
   return true;
 }
 
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+  BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) {
+  uint32_t weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, weight);
+}
+
+
 static bool InBlock(const Value *V, const BasicBlock *BB) {
   if (const Instruction *I = dyn_cast<Instruction>(V))
     return I->getParent() == BB;
@@ -1549,8 +1558,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   }
 
   // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1694,8 +1703,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1718,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                        Reg, VT);
   SDValue Cmp;
-  if (CountPopulation_64(B.Mask) == 1) {
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(getCurDebugLoc(),
@@ -1726,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ShiftOp,
                        DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                        ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
   } else {
     // Make desired shift
     SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
@@ -1740,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ISD::SETNE);
   }
 
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1981,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   // table.
   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
 
   // Build a vector of destination BBs, corresponding to each target
   // of the jump table. If the value of the jump table slot corresponds to
@@ -2009,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
          E = DestBBs.end(); I != E; ++I) {
     if (!SuccsHandled[(*I)->getNumber()]) {
       SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
     }
   }
 
@@ -2428,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
     succs.push_back(I.getSuccessor(i));
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
@@ -2489,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
 void SelectionDAGBuilder::visitICmp(const User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
   if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2855,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   bool IntoUndef = isa<UndefValue>(Op0);
   bool FromUndef = isa<UndefValue>(Op1);
 
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2895,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Type *ValTy = I.getType();
   bool OutOfUndef = isa<UndefValue>(Op0);
 
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
   SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -4623,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::pow:
     visitPow(I);
     return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
   case Intrinsic::convert_to_fp16:
     setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                              MVT::i16, getValue(I.getArgOperand(0))));
@@ -4759,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
+    return 0;
+  }
+
   case Intrinsic::trap: {
     StringRef TrapFuncName = getTrapFunctionName();
     if (TrapFuncName.empty()) {
@@ -4789,15 +4839,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return implVisitAluOverflow(I, ISD::SMULO);
 
   case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
     Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
     DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
                                         DAG.getVTList(MVT::Other),
-                                        &Ops[0], 4,
+                                        &Ops[0], 5,
                                         EVT::getIntegerVT(*Context, 8),
                                         MachinePointerInfo(I.getArgOperand(0)),
                                         0, /* align */
@@ -5415,54 +5466,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 
 } // end anonymous namespace
 
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
-
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
-
 /// GetRegistersForValue - Assign registers (virtual or physical) for the
 /// specified operand.  We prefer to assign virtual registers, to allow the
 /// register allocator to handle the assignment process.  However, if the asm
@@ -5597,52 +5600,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     return;
   }
 
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
@@ -5749,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
           report_fatal_error("Unsupported asm: input constraint"
                              " with a matching output constraint of"
                              " incompatible type!");
@@ -6015,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
+      if (OpInfo.AssignedRegs.Regs.empty())
         report_fatal_error("Couldn't allocate input reg for constraint '" +
                            Twine(OpInfo.ConstraintCode) + "'!");
 
@@ -6031,8 +5990,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(
-                                            InlineAsm::Kind_RegDefEarlyClobber,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
                                                  false, 0, DAG,
                                                  AsmNodeOperands);
       break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8376d41e1531..a0884ebf5d56 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -434,6 +434,9 @@ private:
                                 const Value* SV,
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
+
+  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
 public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
@@ -464,7 +467,7 @@ private:
   void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
   void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
   void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitSDiv(const User &I);
   void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
   void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
   void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 771b0089fdc0..87bb296b8c79 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -68,6 +69,11 @@ static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
 
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+        cl::desc("use Machine Branch Probability Info"),
+        cl::init(true), cl::Hidden);
+
 #ifndef NDEBUG
 static cl::opt<bool>
 ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
@@ -186,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
   DAGSize(0) {
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -199,6 +206,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -262,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF);
+
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+  else
+    FuncInfo->BPI = 0;
+
   SDB->init(GFI, *AA);
 
   SelectAllBasicBlocks(Fn);
@@ -339,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       const MachineBasicBlock *MBB = I;
       for (MachineBasicBlock::const_iterator
              II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
-        const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
 
-        if ((TID.isCall() && !TID.isReturn()) ||
+        if ((MCID.isCall() && !MCID.isReturn()) ||
             II->isStackAligningInlineAsm()) {
           MFI->setHasCalls(true);
           goto done;
@@ -666,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
 
-  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
   BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
@@ -2596,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (EmitNodeInfo & OPFL_MemRefs) {
         // Only attach load or store memory operands if the generated
         // instruction may load or store.
-        const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc);
-        bool mayLoad = TID.mayLoad();
-        bool mayStore = TID.mayStore();
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+        bool mayLoad = MCID.mayLoad();
+        bool mayStore = MCID.mayStore();
 
         unsigned NumMemRefs = 0;
         for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cf6069a2f185..2626ac3bbb2a 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::MUL_I32] = "__mulsi3";
   Names[RTLIB::MUL_I64] = "__muldi3";
   Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
   Names[RTLIB::SDIV_I8] = "__divqi3";
   Names[RTLIB::SDIV_I16] = "__divhi3";
   Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::REM_F64] = "fmod";
   Names[RTLIB::REM_F80] = "fmodl";
   Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::FMA_F32] = "fmaf";
+  Names[RTLIB::FMA_F64] = "fma";
+  Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_PPCF128] = "fmal";
   Names[RTLIB::POWI_F32] = "__powisf2";
   Names[RTLIB::POWI_F64] = "__powidf2";
   Names[RTLIB::POWI_F80] = "__powixf2";
@@ -673,10 +680,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   EVT DestVT = TLI->getRegisterType(NewVT);
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -965,8 +978,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
 
   EVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -1762,9 +1781,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::BITCAST:
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
-        Op.getOperand(0).getValueType().isFloatingPoint() &&
-        !Op.getOperand(0).getValueType().isVector()) {
+    if (!Op.getOperand(0).getValueType().isVector() &&
+        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
       if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
@@ -1902,7 +1921,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   // comparisons.
   if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -2608,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
 
 TargetLowering::ConstraintType
 TargetLowering::getConstraintType(const std::string &Constraint) const {
-  // FIXME: lots more standard ones to handle.
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default: break;
@@ -2661,9 +2679,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                   std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
                                                   SelectionDAG &DAG) const {
-  
+
   if (Constraint.length() > 1) return;
-  
+
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
@@ -2722,13 +2740,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   }
 }
 
-std::vector<unsigned> TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  return std::vector<unsigned>();
-}
-
-
 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
                              EVT VT) const {
@@ -2853,7 +2864,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
           report_fatal_error("Indirect operand for inline asm not a pointer!");
         OpTy = PtrTy->getElementType();
       }
-      
+
       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
       if (const StructType *STy = dyn_cast<StructType>(OpTy))
         if (STy->getNumElements() == 1)
@@ -2955,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
           report_fatal_error("Unsupported asm: input constraint"
                              " with a matching output constraint of"
                              " incompatible type!");
@@ -3204,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                                       SelectionDAG &DAG) const {
+  ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+  APInt d = C->getAPIntValue();
+  assert(d != 0 && "Division by zero!");
+
+  // Shift the value upfront if it is even, so the LSB is one.
+  unsigned ShAmt = d.countTrailingZeros();
+  if (ShAmt) {
+    // TODO: For UDIV use SRL instead of SRA.
+    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+    d = d.ashr(ShAmt);
+  }
+
+  // Calculate the multiplicative inverse, using Newton's method.
+  APInt t, xn = d;
+  while ((t = d*xn) != 1)
+    xn *= APInt(d.getBitWidth(), 2) - t;
+
+  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 6ab0cb03c065..5a253a4d97e4 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -45,7 +45,8 @@ namespace {
 
     /// StackEntryTy - Abstract type of a link in the shadow stack.
     ///
-    const StructType *StackEntryTy;
+    StructType *StackEntryTy;
+    StructType *FrameMapTy;
 
     /// Roots - GC roots in the current function. Each is a pair of the
     /// intrinsic call and its corresponding alloca.
@@ -164,8 +165,7 @@ namespace {
 
           InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
                                               NewBB, CleanupBB,
-                                              Args.begin(), Args.end(),
-                                              CI->getName(), CallBB);
+                                              Args, CI->getName(), CallBB);
           II->setCallingConv(CI->getCallingConv());
           II->setAttributes(CI->getAttributes());
           CI->replaceAllUsesWith(II);
@@ -194,31 +194,31 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
 
   // Truncate the ShadowStackDescriptor if some metadata is null.
   unsigned NumMeta = 0;
-  SmallVector<Constant*,16> Metadata;
+  SmallVector<Constant*, 16> Metadata;
   for (unsigned I = 0; I != Roots.size(); ++I) {
     Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
     if (!C->isNullValue())
       NumMeta = I + 1;
     Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
   }
+  Metadata.resize(NumMeta);
 
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  
   Constant *BaseElts[] = {
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
+    ConstantInt::get(Int32Ty, Roots.size(), false),
+    ConstantInt::get(Int32Ty, NumMeta, false),
   };
 
   Constant *DescriptorElts[] = {
-    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
-    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
-                       Metadata.begin(), NumMeta)
+    ConstantStruct::get(FrameMapTy, BaseElts),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
   };
 
-  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
-                                           false);
-
-  std::string TypeName("gc_map.");
-  TypeName += utostr(NumMeta);
-  F.getParent()->addTypeName(TypeName, FrameMap->getType());
+  Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+  StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys);
+  
+  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
 
   // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
   //        that, short of multithreaded LLVM, it should be safe; all that is
@@ -246,17 +246,12 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
 
 const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
   // doInitialization creates the generic version of this type.
-  std::vector<const Type*> EltTys;
+  std::vector<Type*> EltTys;
   EltTys.push_back(StackEntryTy);
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
-  Type *Ty = StructType::get(F.getContext(), EltTys);
-
-  std::string TypeName("gc_stackentry.");
-  TypeName += F.getName();
-  F.getParent()->addTypeName(TypeName, Ty);
-
-  return Ty;
+  
+  return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys);
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now. If
@@ -267,13 +262,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
   //   void *Meta[];     // May be absent for roots without metadata.
   // };
-  std::vector<const Type*> EltTys;
+  std::vector<Type*> EltTys;
   // 32 bits is ok up to a 32GB stack frame. :)
   EltTys.push_back(Type::getInt32Ty(M.getContext()));
   // Specifies length of variable length array. 
   EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
-  M.addTypeName("gc_map", FrameMapTy);
+  FrameMapTy = StructType::createNamed("gc_map", EltTys);
   PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
 
   // struct StackEntry {
@@ -281,18 +275,14 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   FrameMap *Map;          // Pointer to constant FrameMap.
   //   void *Roots[];          // Stack roots (in-place array, so we pretend).
   // };
-  OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
-
+  
+  StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry");
+  
   EltTys.clear();
-  EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
   EltTys.push_back(FrameMapPtrTy);
-  PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
-
-  RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
-  StackEntryTy = cast<StructType>(LinkTyH.get());
+  StackEntryTy->setBody(EltTys);
   const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
-  M.addTypeName("gc_stackentry", LinkTyH.get());  // FIXME: Is this safe from
-                                                  //        a FunctionPass?
 
   // Get the root chain if it already exists.
   Head = M.getGlobalVariable("llvm_gc_root_chain");
@@ -399,7 +389,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
   Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
   Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
                                         0,1,"gc_frame.map");
-                              AtEntry.CreateStore(FrameMap, EntryMapPtr);
+  AtEntry.CreateStore(FrameMap, EntryMapPtr);
 
   // After all the allocas...
   for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
deleted file mode 100644
index 221bec50d850..000000000000
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ /dev/null
@@ -1,1539 +0,0 @@
-//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple register coalescing pass that attempts to
-// aggressively coalesce every register copy that it can.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regcoalescing"
-#include "SimpleRegisterCoalescing.h"
-#include "VirtRegMap.h"
-#include "LiveDebugVariables.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Value.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-#include <cmath>
-using namespace llvm;
-
-STATISTIC(numJoins    , "Number of interval joins performed");
-STATISTIC(numCrossRCs , "Number of cross class joins performed");
-STATISTIC(numCommutes , "Number of instruction commuting performed");
-STATISTIC(numExtends  , "Number of copies extended");
-STATISTIC(NumReMats   , "Number of instructions re-materialized");
-STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
-STATISTIC(numAborts   , "Number of times interval joining aborted");
-
-char SimpleRegisterCoalescing::ID = 0;
-static cl::opt<bool>
-EnableJoining("join-liveintervals",
-              cl::desc("Coalesce copies (default=true)"),
-              cl::init(true));
-
-static cl::opt<bool>
-DisableCrossClassJoin("disable-cross-class-join",
-               cl::desc("Avoid coalescing cross register class copies"),
-               cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-EnablePhysicalJoin("join-physregs",
-                   cl::desc("Join physical register copies"),
-                   cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-VerifyCoalescing("verify-coalescing",
-         cl::desc("Verify machine instrs before and after register coalescing"),
-         cl::Hidden);
-
-INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
-                "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
-                "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true)
-
-char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
-
-void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
-  AU.addRequired<LiveIntervals>();
-  AU.addPreserved<LiveIntervals>();
-  AU.addRequired<LiveDebugVariables>();
-  AU.addPreserved<LiveDebugVariables>();
-  AU.addPreserved<SlotIndexes>();
-  AU.addRequired<MachineLoopInfo>();
-  AU.addPreserved<MachineLoopInfo>();
-  AU.addPreservedID(MachineDominatorsID);
-  AU.addPreservedID(StrongPHIEliminationID);
-  AU.addPreservedID(PHIEliminationID);
-  AU.addPreservedID(TwoAddressInstructionPassID);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) {
-  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
-  JoinedCopies.insert(CopyMI);
-
-  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
-  /// code elimination.
-  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
-       E = CopyMI->operands_end(); I != E; ++I)
-    if (I->isReg())
-      I->setIsUndef(true);
-}
-
-/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
-/// see if we can merge these two pieces of B into a single value number,
-/// eliminating a copy.  For example:
-///
-///  A3 = B0
-///    ...
-///  B1 = A3      <- this copy
-///
-/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
-/// value number to be replaced with B0 (which simplifies the B liveinterval).
-///
-/// This returns true if an interval was modified.
-///
-bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
-                                                    MachineInstr *CopyMI) {
-  // Bail if there is no dst interval - can happen when merging physical subreg
-  // operations.
-  if (!li_->hasInterval(CP.getDstReg()))
-    return false;
-
-  LiveInterval &IntA =
-    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
-  LiveInterval &IntB =
-    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-
-  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
-  // the example above.
-  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
-  if (BLR == IntB.end()) return false;
-  VNInfo *BValNo = BLR->valno;
-
-  // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
-  // can't process it.
-  if (!BValNo->isDefByCopy()) return false;
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-
-  // AValNo is the value number in A that defines the copy, A3 in the example.
-  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
-  // The live range might not exist after fun with physreg coalescing.
-  if (ALR == IntA.end()) return false;
-  VNInfo *AValNo = ALR->valno;
-  // If it's re-defined by an early clobber somewhere in the live range, then
-  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
-  // See PR3149:
-  // 172     %ECX<def> = MOV32rr %reg1039<kill>
-  // 180     INLINEASM <es:subl $5,$1
-  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
-  //         %EAX<kill>,
-  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
-  // 188     %EAX<def> = MOV32rr %EAX<kill>
-  // 196     %ECX<def> = MOV32rr %ECX<kill>
-  // 204     %ECX<def> = MOV32rr %ECX<kill>
-  // 212     %EAX<def> = MOV32rr %EAX<kill>
-  // 220     %EAX<def> = MOV32rr %EAX
-  // 228     %reg1039<def> = MOV32rr %ECX<kill>
-  // The early clobber operand ties ECX input to the ECX def.
-  //
-  // The live interval of ECX is represented as this:
-  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
-  // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->hasRedefByEC())
-    return false;
-
-  // If AValNo is defined as a copy from IntB, we can potentially process this.
-  // Get the instruction that defines this value number.
-  if (!CP.isCoalescable(AValNo->getCopy()))
-    return false;
-
-  // Get the LiveRange in IntB that this value number starts with.
-  LiveInterval::iterator ValLR =
-    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
-  if (ValLR == IntB.end())
-    return false;
-
-  // Make sure that the end of the live range is inside the same block as
-  // CopyMI.
-  MachineInstr *ValLREndInst =
-    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
-  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
-    return false;
-
-  // Okay, we now know that ValLR ends in the same block that the CopyMI
-  // live-range starts.  If there are no intervening live ranges between them in
-  // IntB, we can merge them.
-  if (ValLR+1 != BLR) return false;
-
-  // If a live interval is a physical register, conservatively check if any
-  // of its aliases is overlapping the live interval of the virtual register.
-  // If so, do not coalesce.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
-      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
-        DEBUG({
-            dbgs() << "\t\tInterfere with alias ";
-            li_->getInterval(*AS).print(dbgs(), tri_);
-          });
-        return false;
-      }
-  }
-
-  DEBUG({
-      dbgs() << "Extending: ";
-      IntB.print(dbgs(), tri_);
-    });
-
-  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
-  // We are about to delete CopyMI, so need to remove it as the 'instruction
-  // that defines this value #'. Update the valnum with the new defining
-  // instruction #.
-  BValNo->def  = FillerStart;
-  BValNo->setCopy(0);
-
-  // Okay, we can merge them.  We need to insert a new liverange:
-  // [ValLR.end, BLR.begin) of either value number, then we merge the
-  // two value numbers.
-  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
-
-  // If the IntB live range is assigned to a physical register, and if that
-  // physreg has sub-registers, update their live intervals as well.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
-      if (!li_->hasInterval(*SR))
-        continue;
-      LiveInterval &SRLI = li_->getInterval(*SR);
-      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0,
-                                                li_->getVNInfoAllocator())));
-    }
-  }
-
-  // Okay, merge "B1" into the same value number as "B0".
-  if (BValNo != ValLR->valno) {
-    // If B1 is killed by a PHI, then the merged live range must also be killed
-    // by the same PHI, as B0 and B1 can not overlap.
-    bool HasPHIKill = BValNo->hasPHIKill();
-    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
-    if (HasPHIKill)
-      ValLR->valno->setHasPHIKill(true);
-  }
-  DEBUG({
-      dbgs() << "   result = ";
-      IntB.print(dbgs(), tri_);
-      dbgs() << "\n";
-    });
-
-  // If the source instruction was killing the source register before the
-  // merge, unset the isKill marker given the live range has been extended.
-  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
-  if (UIdx != -1) {
-    ValLREndInst->getOperand(UIdx).setIsKill(false);
-  }
-
-  // If the copy instruction was killing the destination register before the
-  // merge, find the last use and trim the live range. That will also add the
-  // isKill marker.
-  if (ALR->end == CopyIdx)
-    li_->shrinkToUses(&IntA);
-
-  ++numExtends;
-  return true;
-}
-
-/// HasOtherReachingDefs - Return true if there are definitions of IntB
-/// other than BValNo val# that can reach uses of AValno val# of IntA.
-bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
-                                                    LiveInterval &IntB,
-                                                    VNInfo *AValNo,
-                                                    VNInfo *BValNo) {
-  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
-       AI != AE; ++AI) {
-    if (AI->valno != AValNo) continue;
-    LiveInterval::Ranges::iterator BI =
-      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
-    if (BI != IntB.ranges.begin())
-      --BI;
-    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
-      if (BI->valno == BValNo)
-        continue;
-      if (BI->start <= AI->start && BI->end > AI->start)
-        return true;
-      if (BI->start > AI->start && BI->start < AI->end)
-        return true;
-    }
-  }
-  return false;
-}
-
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
-/// IntA being the source and IntB being the dest, thus this defines a value
-/// number in IntB.  If the source value number (in IntA) is defined by a
-/// commutable instruction and its other operand is coalesced to the copy dest
-/// register, see if we can transform the copy into a noop by commuting the
-/// definition. For example,
-///
-///  A3 = op A2 B0<kill>
-///    ...
-///  B1 = A3      <- this copy
-///    ...
-///     = op A3   <- more uses
-///
-/// ==>
-///
-///  B2 = op B0 A2<kill>
-///    ...
-///  B1 = B2      <- now an identify copy
-///    ...
-///     = op B2   <- more uses
-///
-/// This returns true if an interval was modified.
-///
-bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
-                                                        MachineInstr *CopyMI) {
-  // FIXME: For now, only eliminate the copy by commuting its def when the
-  // source register is a virtual register. We want to guard against cases
-  // where the copy is a back edge copy and commuting the def lengthen the
-  // live interval of the source register to the entire loop.
-  if (CP.isPhys() && CP.isFlipped())
-    return false;
-
-  // Bail if there is no dst interval.
-  if (!li_->hasInterval(CP.getDstReg()))
-    return false;
-
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-
-  LiveInterval &IntA =
-    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
-  LiveInterval &IntB =
-    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-
-  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
-  // the example above.
-  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
-  if (!BValNo || !BValNo->isDefByCopy())
-    return false;
-
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-
-  // AValNo is the value number in A that defines the copy, A3 in the example.
-  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
-  assert(AValNo && "COPY source not live");
-
-  // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
-    return false;
-  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
-  if (!DefMI)
-    return false;
-  const TargetInstrDesc &TID = DefMI->getDesc();
-  if (!TID.isCommutable())
-    return false;
-  // If DefMI is a two-address instruction then commuting it will change the
-  // destination register.
-  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
-  assert(DefIdx != -1);
-  unsigned UseOpIdx;
-  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
-    return false;
-  unsigned Op1, Op2, NewDstIdx;
-  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
-    return false;
-  if (Op1 == UseOpIdx)
-    NewDstIdx = Op2;
-  else if (Op2 == UseOpIdx)
-    NewDstIdx = Op1;
-  else
-    return false;
-
-  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-  unsigned NewReg = NewDstMO.getReg();
-  if (NewReg != IntB.reg || !NewDstMO.isKill())
-    return false;
-
-  // Make sure there are no other definitions of IntB that would reach the
-  // uses which the new definition can reach.
-  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
-    return false;
-
-  // Abort if the aliases of IntB.reg have values that are not simply the
-  // clobbers from the superreg.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
-      if (li_->hasInterval(*AS) &&
-          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
-        return false;
-
-  // If some of the uses of IntA.reg is already coalesced away, return false.
-  // It's not possible to determine whether it's safe to perform the coalescing.
-  for (MachineRegisterInfo::use_nodbg_iterator UI = 
-         mri_->use_nodbg_begin(IntA.reg), 
-       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
-    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
-    if (ULR == IntA.end())
-      continue;
-    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
-      return false;
-  }
-
-  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
-               << *DefMI);
-
-  // At this point we have decided that it is legal to do this
-  // transformation.  Start by commuting the instruction.
-  MachineBasicBlock *MBB = DefMI->getParent();
-  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
-  if (!NewMI)
-    return false;
-  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
-      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
-      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
-    return false;
-  if (NewMI != DefMI) {
-    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
-    MBB->insert(DefMI, NewMI);
-    MBB->erase(DefMI);
-  }
-  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
-  NewMI->getOperand(OpIdx).setIsKill();
-
-  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
-  // A = or A, B
-  // ...
-  // B = A
-  // ...
-  // C = A<kill>
-  // ...
-  //   = B
-
-  // Update uses of IntA of the specific Val# with IntB.
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
-         UE = mri_->use_end(); UI != UE;) {
-    MachineOperand &UseMO = UI.getOperand();
-    MachineInstr *UseMI = &*UI;
-    ++UI;
-    if (JoinedCopies.count(UseMI))
-      continue;
-    if (UseMI->isDebugValue()) {
-      // FIXME These don't have an instruction index.  Not clear we have enough
-      // info to decide whether to do this replacement or not.  For now do it.
-      UseMO.setReg(NewReg);
-      continue;
-    }
-    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
-    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
-    if (ULR == IntA.end() || ULR->valno != AValNo)
-      continue;
-    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
-      UseMO.substPhysReg(NewReg, *tri_);
-    else
-      UseMO.setReg(NewReg);
-    if (UseMI == CopyMI)
-      continue;
-    if (!UseMI->isCopy())
-      continue;
-    if (UseMI->getOperand(0).getReg() != IntB.reg ||
-        UseMI->getOperand(0).getSubReg())
-      continue;
-
-    // This copy will become a noop. If it's defining a new val#, merge it into
-    // BValNo.
-    SlotIndex DefIdx = UseIdx.getDefIndex();
-    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
-    if (!DVNI)
-      continue;
-    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
-    assert(DVNI->def == DefIdx);
-    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
-    markAsJoined(UseMI);
-  }
-
-  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
-  // is updated.
-  VNInfo *ValNo = BValNo;
-  ValNo->def = AValNo->def;
-  ValNo->setCopy(0);
-  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
-       AI != AE; ++AI) {
-    if (AI->valno != AValNo) continue;
-    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
-  }
-  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
-
-  IntA.removeValNo(AValNo);
-  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
-  ++numCommutes;
-  return true;
-}
-
-/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
-/// computation, replace the copy by rematerialize the definition.
-bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
-                                                       bool preserveSrcInt,
-                                                       unsigned DstReg,
-                                                       unsigned DstSubIdx,
-                                                       MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
-  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
-  assert(SrcLR != SrcInt.end() && "Live range not found!");
-  VNInfo *ValNo = SrcLR->valno;
-  // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
-    return false;
-  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
-  if (!DefMI)
-    return false;
-  assert(DefMI && "Defining instruction disappeared");
-  const TargetInstrDesc &TID = DefMI->getDesc();
-  if (!TID.isAsCheapAsAMove())
-    return false;
-  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
-    return false;
-  bool SawStore = false;
-  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
-    return false;
-  if (TID.getNumDefs() != 1)
-    return false;
-  if (!DefMI->isImplicitDef()) {
-    // Make sure the copy destination register class fits the instruction
-    // definition register class. The mismatch can happen as a result of earlier
-    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
-    const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
-    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-      if (mri_->getRegClass(DstReg) != RC)
-        return false;
-    } else if (!RC->contains(DstReg))
-      return false;
-  }
-
-  // If destination register has a sub-register index on it, make sure it
-  // matches the instruction register class.
-  if (DstSubIdx) {
-    const TargetInstrDesc &TID = DefMI->getDesc();
-    if (TID.getNumDefs() != 1)
-      return false;
-    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
-    const TargetRegisterClass *DstSubRC =
-      DstRC->getSubRegisterRegClass(DstSubIdx);
-    const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
-    if (DefRC == DstRC)
-      DstSubIdx = 0;
-    else if (DefRC != DstSubRC)
-      return false;
-  }
-
-  RemoveCopyFlag(DstReg, CopyMI);
-
-  MachineBasicBlock *MBB = CopyMI->getParent();
-  MachineBasicBlock::iterator MII =
-    llvm::next(MachineBasicBlock::iterator(CopyMI));
-  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
-  MachineInstr *NewMI = prior(MII);
-
-  // CopyMI may have implicit operands, transfer them over to the newly
-  // rematerialized instruction. And update implicit def interval valnos.
-  for (unsigned i = CopyMI->getDesc().getNumOperands(),
-         e = CopyMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = CopyMI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-    if (MO.isDef())
-      RemoveCopyFlag(MO.getReg(), CopyMI);
-  }
-
-  NewMI->copyImplicitOps(CopyMI);
-  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
-  CopyMI->eraseFromParent();
-  ReMatCopies.insert(CopyMI);
-  ReMatDefs.insert(DefMI);
-  DEBUG(dbgs() << "Remat: " << *NewMI);
-  ++NumReMats;
-
-  // The source interval can become smaller because we removed a use.
-  if (preserveSrcInt)
-    li_->shrinkToUses(&SrcInt);
-
-  return true;
-}
-
-/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
-/// update the subregister number if it is not zero. If DstReg is a
-/// physical register and the existing subregister number of the def / use
-/// being updated is not zero, make sure to set it to the correct physical
-/// subregister.
-void
-SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
-  bool DstIsPhys = CP.isPhys();
-  unsigned SrcReg = CP.getSrcReg();
-  unsigned DstReg = CP.getDstReg();
-  unsigned SubIdx = CP.getSubIdx();
-
-  // Update LiveDebugVariables.
-  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
-
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
-       MachineInstr *UseMI = I.skipInstruction();) {
-    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
-    // instead.
-    if (DstIsPhys) {
-      if (UseMI->isCopy() &&
-          !UseMI->getOperand(1).getSubReg() &&
-          !UseMI->getOperand(0).getSubReg() &&
-          UseMI->getOperand(1).getReg() == SrcReg &&
-          UseMI->getOperand(0).getReg() != SrcReg &&
-          UseMI->getOperand(0).getReg() != DstReg &&
-          !JoinedCopies.count(UseMI) &&
-          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
-                                  UseMI->getOperand(0).getReg(), 0, UseMI))
-        continue;
-    }
-
-    SmallVector<unsigned,8> Ops;
-    bool Reads, Writes;
-    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
-    bool Kills = false, Deads = false;
-
-    // Replace SrcReg with DstReg in all UseMI operands.
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = UseMI->getOperand(Ops[i]);
-      Kills |= MO.isKill();
-      Deads |= MO.isDead();
-
-      if (DstIsPhys)
-        MO.substPhysReg(DstReg, *tri_);
-      else
-        MO.substVirtReg(DstReg, SubIdx, *tri_);
-    }
-
-    // This instruction is a copy that will be removed.
-    if (JoinedCopies.count(UseMI))
-      continue;
-
-    if (SubIdx) {
-      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
-      // read-modify-write of DstReg.
-      if (Deads)
-        UseMI->addRegisterDead(DstReg, tri_);
-      else if (!Reads && Writes)
-        UseMI->addRegisterDefined(DstReg, tri_);
-
-      // Kill flags apply to the whole physical register.
-      if (DstIsPhys && Kills)
-        UseMI->addRegisterKilled(DstReg, tri_);
-    }
-
-    DEBUG({
-        dbgs() << "\t\tupdated: ";
-        if (!UseMI->isDebugValue())
-          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
-        dbgs() << *UseMI;
-      });
-  }
-}
-
-/// removeIntervalIfEmpty - Check if the live interval of a physical register
-/// is empty, if so remove it and also remove the empty intervals of its
-/// sub-registers. Return true if live interval is removed.
-static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
-                                  const TargetRegisterInfo *tri_) {
-  if (li.empty()) {
-    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
-      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
-        if (!li_->hasInterval(*SR))
-          continue;
-        LiveInterval &sli = li_->getInterval(*SR);
-        if (sli.empty())
-          li_->removeInterval(*SR);
-      }
-    li_->removeInterval(li.reg);
-    return true;
-  }
-  return false;
-}
-
-/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
-/// the val# it defines. If the live interval becomes empty, remove it as well.
-bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
-                                             MachineInstr *DefMI) {
-  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
-  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
-  if (DefIdx != MLR->valno->def)
-    return false;
-  li.removeValNo(MLR->valno);
-  return removeIntervalIfEmpty(li, li_, tri_);
-}
-
-void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
-                                              const MachineInstr *CopyMI) {
-  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-  if (li_->hasInterval(DstReg)) {
-    LiveInterval &LI = li_->getInterval(DstReg);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
-    return;
-  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
-    if (!li_->hasInterval(*AS))
-      continue;
-    LiveInterval &LI = li_->getInterval(*AS);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-}
-
-/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
-/// We need to be careful about coalescing a source physical register with a
-/// virtual register. Once the coalescing is done, it cannot be broken and these
-/// are not spillable! If the destination interval uses are far away, think
-/// twice about coalescing them!
-bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) {
-  bool Allocatable = li_->isAllocatable(CP.getDstReg());
-  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
-
-  /// Always join simple intervals that are defined by a single copy from a
-  /// reserved register. This doesn't increase register pressure, so it is
-  /// always beneficial.
-  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
-    return true;
-
-  if (!EnablePhysicalJoin) {
-    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
-    return false;
-  }
-
-  // Only coalesce to allocatable physreg, we don't want to risk modifying
-  // reserved registers.
-  if (!Allocatable) {
-    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
-    return false;  // Not coalescable.
-  }
-
-  // Don't join with physregs that have a ridiculous number of live
-  // ranges. The data structure performance is really bad when that
-  // happens.
-  if (li_->hasInterval(CP.getDstReg()) &&
-      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
-    ++numAborts;
-    DEBUG(dbgs()
-          << "\tPhysical register live interval too complicated, abort!\n");
-    return false;
-  }
-
-  // FIXME: Why are we skipping this test for partial copies?
-  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
-  if (!CP.isPartial()) {
-    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
-    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
-    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-    if (Length > Threshold) {
-      ++numAborts;
-      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-      return false;
-    }
-  }
-  return true;
-}
-
-/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
-/// two virtual registers from different register classes.
-bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
-                                                unsigned DstReg,
-                                             const TargetRegisterClass *SrcRC,
-                                             const TargetRegisterClass *DstRC,
-                                             const TargetRegisterClass *NewRC) {
-  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
-  // This heuristics is good enough in practice, but it's obviously not *right*.
-  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
-  // out all but the most restrictive register classes.
-  if (NewRCCount > 4 ||
-      // Early exit if the function is fairly small, coalesce aggressively if
-      // that's the case. For really special register classes with 3 or
-      // fewer registers, be a bit more careful.
-      (li_->getFuncInstructionCount() / NewRCCount) < 8)
-    return true;
-  LiveInterval &SrcInt = li_->getInterval(SrcReg);
-  LiveInterval &DstInt = li_->getInterval(DstReg);
-  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
-  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
-
-  // Coalesce aggressively if the intervals are small compared to the number of
-  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
-  // less aggressive than the 8 used for the whole function size.
-  const unsigned ThresSize = 4 * NewRCCount;
-  if (SrcSize <= ThresSize && DstSize <= ThresSize)
-    return true;
-
-  // Estimate *register use density*. If it doubles or more, abort.
-  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
-                                   mri_->use_nodbg_end());
-  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
-                                   mri_->use_nodbg_end());
-  unsigned NewUses = SrcUses + DstUses;
-  unsigned NewSize = SrcSize + DstSize;
-  if (SrcRC != NewRC && SrcSize > ThresSize) {
-    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
-    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
-      return false;
-  }
-  if (DstRC != NewRC && DstSize > ThresSize) {
-    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
-    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
-      return false;
-  }
-  return true;
-}
-
-
-/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
-/// which are the src/dst of the copy instruction CopyMI.  This returns true
-/// if the copy was successfully coalesced away. If it is not currently
-/// possible to coalesce this interval, but it may be possible if other
-/// things get coalesced, then it returns true by reference in 'Again'.
-bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
-  MachineInstr *CopyMI = TheCopy.MI;
-
-  Again = false;
-  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
-    return false; // Already done.
-
-  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
-
-  CoalescerPair CP(*tii_, *tri_);
-  if (!CP.setRegisters(CopyMI)) {
-    DEBUG(dbgs() << "\tNot coalescable.\n");
-    return false;
-  }
-
-  // If they are already joined we continue.
-  if (CP.getSrcReg() == CP.getDstReg()) {
-    markAsJoined(CopyMI);
-    DEBUG(dbgs() << "\tCopy already coalesced.\n");
-    return false;  // Not coalescable.
-  }
-
-  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
-               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
-               << "\n");
-
-  // Enforce policies.
-  if (CP.isPhys()) {
-    if (!shouldJoinPhys(CP)) {
-      // Before giving up coalescing, if definition of source is defined by
-      // trivial computation, try rematerializing it.
-      if (!CP.isFlipped() &&
-          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
-                                  CP.getDstReg(), 0, CopyMI))
-        return true;
-      return false;
-    }
-  } else {
-    // Avoid constraining virtual register regclass too much.
-    if (CP.isCrossClass()) {
-      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
-      if (DisableCrossClassJoin) {
-        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
-        return false;
-      }
-      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
-                                 mri_->getRegClass(CP.getSrcReg()),
-                                 mri_->getRegClass(CP.getDstReg()),
-                                 CP.getNewRC())) {
-        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
-        Again = true;  // May be possible to coalesce later.
-        return false;
-      }
-    }
-
-    // When possible, let DstReg be the larger interval.
-    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
-                           li_->getInterval(CP.getDstReg()).ranges.size())
-      CP.flip();
-  }
-
-  // Okay, attempt to join these two intervals.  On failure, this returns false.
-  // Otherwise, if one of the intervals being joined is a physreg, this method
-  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
-  // been modified, so we can use this information below to update aliases.
-  if (!JoinIntervals(CP)) {
-    // Coalescing failed.
-
-    // If definition of source is defined by trivial computation, try
-    // rematerializing it.
-    if (!CP.isFlipped() &&
-        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
-                                CP.getDstReg(), 0, CopyMI))
-      return true;
-
-    // If we can eliminate the copy without merging the live ranges, do so now.
-    if (!CP.isPartial()) {
-      if (AdjustCopiesBackFrom(CP, CopyMI) ||
-          RemoveCopyByCommutingDef(CP, CopyMI)) {
-        markAsJoined(CopyMI);
-        DEBUG(dbgs() << "\tTrivial!\n");
-        return true;
-      }
-    }
-
-    // Otherwise, we are unable to join the intervals.
-    DEBUG(dbgs() << "\tInterference!\n");
-    Again = true;  // May be possible to coalesce later.
-    return false;
-  }
-
-  // Coalescing to a virtual register that is of a sub-register class of the
-  // other. Make sure the resulting register is set to the right register class.
-  if (CP.isCrossClass()) {
-    ++numCrossRCs;
-    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
-  }
-
-  // Remember to delete the copy instruction.
-  markAsJoined(CopyMI);
-
-  UpdateRegDefsUses(CP);
-
-  // If we have extended the live range of a physical register, make sure we
-  // update live-in lists as well.
-  if (CP.isPhys()) {
-    SmallVector<MachineBasicBlock*, 16> BlockSeq;
-    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
-    // ranges for this, and they are preserved.
-    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
-    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
-         I != E; ++I ) {
-      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
-      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
-        MachineBasicBlock &block = *BlockSeq[idx];
-        if (!block.isLiveIn(CP.getDstReg()))
-          block.addLiveIn(CP.getDstReg());
-      }
-      BlockSeq.clear();
-    }
-  }
-
-  // SrcReg is guarateed to be the register whose live interval that is
-  // being merged.
-  li_->removeInterval(CP.getSrcReg());
-
-  // Update regalloc hint.
-  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
-
-  DEBUG({
-    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
-    dbgs() << "\tJoined. Result = ";
-    DstInt.print(dbgs(), tri_);
-    dbgs() << "\n";
-  });
-
-  ++numJoins;
-  return true;
-}
-
-/// ComputeUltimateVN - Assuming we are going to join two live intervals,
-/// compute what the resultant value numbers for each value in the input two
-/// ranges will be.  This is complicated by copies between the two which can
-/// and will commonly cause multiple value numbers to be merged into one.
-///
-/// VN is the value number that we're trying to resolve.  InstDefiningValue
-/// keeps track of the new InstDefiningValue assignment for the result
-/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
-/// whether a value in this or other is a copy from the opposite set.
-/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
-/// already been assigned.
-///
-/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
-/// contains the value number the copy is from.
-///
-static unsigned ComputeUltimateVN(VNInfo *VNI,
-                                  SmallVector<VNInfo*, 16> &NewVNInfo,
-                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
-                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
-                                  SmallVector<int, 16> &ThisValNoAssignments,
-                                  SmallVector<int, 16> &OtherValNoAssignments) {
-  unsigned VN = VNI->id;
-
-  // If the VN has already been computed, just return it.
-  if (ThisValNoAssignments[VN] >= 0)
-    return ThisValNoAssignments[VN];
-  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
-
-  // If this val is not a copy from the other val, then it must be a new value
-  // number in the destination.
-  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
-  if (I == ThisFromOther.end()) {
-    NewVNInfo.push_back(VNI);
-    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
-  }
-  VNInfo *OtherValNo = I->second;
-
-  // Otherwise, this *is* a copy from the RHS.  If the other side has already
-  // been computed, return it.
-  if (OtherValNoAssignments[OtherValNo->id] >= 0)
-    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-
-  // Mark this value number as currently being computed, then ask what the
-  // ultimate value # of the other value is.
-  ThisValNoAssignments[VN] = -2;
-  unsigned UltimateVN =
-    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
-                      OtherValNoAssignments, ThisValNoAssignments);
-  return ThisValNoAssignments[VN] = UltimateVN;
-}
-
-/// JoinIntervals - Attempt to join these two intervals.  On failure, this
-/// returns false.
-bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
-  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
-  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
-
-  // If a live interval is a physical register, check for interference with any
-  // aliases. The interference check implemented here is a bit more conservative
-  // than the full interfeence check below. We allow overlapping live ranges
-  // only when one is a copy of the other.
-  if (CP.isPhys()) {
-    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
-      if (!li_->hasInterval(*AS))
-        continue;
-      const LiveInterval &LHS = li_->getInterval(*AS);
-      LiveInterval::const_iterator LI = LHS.begin();
-      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
-           RI != RE; ++RI) {
-        LI = std::lower_bound(LI, LHS.end(), RI->start);
-        // Does LHS have an overlapping live range starting before RI?
-        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
-            (RI->start != RI->valno->def ||
-             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
-          DEBUG({
-            dbgs() << "\t\tInterference from alias: ";
-            LHS.print(dbgs(), tri_);
-            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
-          });
-          return false;
-        }
-
-        // Check that LHS ranges beginning in this range are copies.
-        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
-          if (LI->start != LI->valno->def ||
-              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
-            DEBUG({
-              dbgs() << "\t\tInterference from alias: ";
-              LHS.print(dbgs(), tri_);
-              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
-            });
-            return false;
-          }
-        }
-      }
-    }
-  }
-
-  // Compute the final value assignment, assuming that the live ranges can be
-  // coalesced.
-  SmallVector<int, 16> LHSValNoAssignments;
-  SmallVector<int, 16> RHSValNoAssignments;
-  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
-  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
-  SmallVector<VNInfo*, 16> NewVNInfo;
-
-  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
-  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
-
-  // Loop over the value numbers of the LHS, seeing if any are defined from
-  // the RHS.
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
-      continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
-
-    // DstReg is known to be a register in the LHS interval.  If the src is
-    // from the RHS interval, we can use its value #.
-    if (!CP.isCoalescable(VNI->getCopy()))
-      continue;
-
-    // Figure out the value # from the RHS.
-    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
-    // The copy could be to an aliased physreg.
-    if (!lr) continue;
-    LHSValsDefinedFromRHS[VNI] = lr->valno;
-  }
-
-  // Loop over the value numbers of the RHS, seeing if any are defined from
-  // the LHS.
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
-      continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
-
-    // DstReg is known to be a register in the RHS interval.  If the src is
-    // from the LHS interval, we can use its value #.
-    if (!CP.isCoalescable(VNI->getCopy()))
-      continue;
-
-    // Figure out the value # from the LHS.
-    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
-    // The copy could be to an aliased physreg.
-    if (!lr) continue;
-    RHSValsDefinedFromLHS[VNI] = lr->valno;
-  }
-
-  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
-  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
-  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
-
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
-                      LHSValNoAssignments, RHSValNoAssignments);
-  }
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    // If this value number isn't a copy from the LHS, it's a new number.
-    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
-      NewVNInfo.push_back(VNI);
-      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
-      continue;
-    }
-
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
-                      RHSValNoAssignments, LHSValNoAssignments);
-  }
-
-  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
-  // interval lists to see if these intervals are coalescable.
-  LiveInterval::const_iterator I = LHS.begin();
-  LiveInterval::const_iterator IE = LHS.end();
-  LiveInterval::const_iterator J = RHS.begin();
-  LiveInterval::const_iterator JE = RHS.end();
-
-  // Skip ahead until the first place of potential sharing.
-  if (I != IE && J != JE) {
-    if (I->start < J->start) {
-      I = std::upper_bound(I, IE, J->start);
-      if (I != LHS.begin()) --I;
-    } else if (J->start < I->start) {
-      J = std::upper_bound(J, JE, I->start);
-      if (J != RHS.begin()) --J;
-    }
-  }
-
-  while (I != IE && J != JE) {
-    // Determine if these two live ranges overlap.
-    bool Overlaps;
-    if (I->start < J->start) {
-      Overlaps = I->end > J->start;
-    } else {
-      Overlaps = J->end > I->start;
-    }
-
-    // If so, check value # info to determine if they are really different.
-    if (Overlaps) {
-      // If the live range overlap will map to the same value number in the
-      // result liverange, we can still coalesce them.  If not, we can't.
-      if (LHSValNoAssignments[I->valno->id] !=
-          RHSValNoAssignments[J->valno->id])
-        return false;
-      // If it's re-defined by an early clobber somewhere in the live range,
-      // then conservatively abort coalescing.
-      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
-        return false;
-    }
-
-    if (I->end < J->end)
-      ++I;
-    else
-      ++J;
-  }
-
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
-         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned LHSValID = LHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[LHSValID]->setHasPHIKill(true);
-  }
-
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
-         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned RHSValID = RHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[RHSValID]->setHasPHIKill(true);
-  }
-
-  if (LHSValNoAssignments.empty())
-    LHSValNoAssignments.push_back(-1);
-  if (RHSValNoAssignments.empty())
-    RHSValNoAssignments.push_back(-1);
-
-  // If we get here, we know that we can coalesce the live ranges.  Ask the
-  // intervals to coalesce themselves now.
-  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
-           mri_);
-  return true;
-}
-
-namespace {
-  // DepthMBBCompare - Comparison predicate that sort first based on the loop
-  // depth of the basic block (the unsigned), and then on the MBB number.
-  struct DepthMBBCompare {
-    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
-    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      // Deeper loops first
-      if (LHS.first != RHS.first)
-        return LHS.first > RHS.first;
-
-      // Prefer blocks that are more connected in the CFG. This takes care of
-      // the most difficult copies first while intervals are short.
-      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
-      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
-      if (cl != cr)
-        return cl > cr;
-
-      // As a last resort, sort by block number.
-      return LHS.second->getNumber() < RHS.second->getNumber();
-    }
-  };
-}
-
-void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
-                                               std::vector<CopyRec> &TryAgain) {
-  DEBUG(dbgs() << MBB->getName() << ":\n");
-
-  SmallVector<CopyRec, 8> VirtCopies;
-  SmallVector<CopyRec, 8> PhysCopies;
-  SmallVector<CopyRec, 8> ImpDefCopies;
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E;) {
-    MachineInstr *Inst = MII++;
-
-    // If this isn't a copy nor a extract_subreg, we can't join intervals.
-    unsigned SrcReg, DstReg;
-    if (Inst->isCopy()) {
-      DstReg = Inst->getOperand(0).getReg();
-      SrcReg = Inst->getOperand(1).getReg();
-    } else if (Inst->isSubregToReg()) {
-      DstReg = Inst->getOperand(0).getReg();
-      SrcReg = Inst->getOperand(2).getReg();
-    } else
-      continue;
-
-    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
-    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
-      ImpDefCopies.push_back(CopyRec(Inst, 0));
-    else if (SrcIsPhys || DstIsPhys)
-      PhysCopies.push_back(CopyRec(Inst, 0));
-    else
-      VirtCopies.push_back(CopyRec(Inst, 0));
-  }
-
-  // Try coalescing implicit copies and insert_subreg <undef> first,
-  // followed by copies to / from physical registers, then finally copies
-  // from virtual registers to virtual registers.
-  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = ImpDefCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = PhysCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = VirtCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-}
-
-void SimpleRegisterCoalescing::joinIntervals() {
-  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
-
-  std::vector<CopyRec> TryAgainList;
-  if (loopInfo->empty()) {
-    // If there are no loops in the function, join intervals in function order.
-    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
-         I != E; ++I)
-      CopyCoalesceInMBB(I, TryAgainList);
-  } else {
-    // Otherwise, join intervals in inner loops before other intervals.
-    // Unfortunately we can't just iterate over loop hierarchy here because
-    // there may be more MBB's than BB's.  Collect MBB's for sorting.
-
-    // Join intervals in the function prolog first. We want to join physical
-    // registers with virtual registers before the intervals got too long.
-    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
-    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
-      MachineBasicBlock *MBB = I;
-      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
-    }
-
-    // Sort by loop depth.
-    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
-
-    // Finally, join intervals in loop nest order.
-    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
-      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
-  }
-
-  // Joining intervals can allow other intervals to be joined.  Iteratively join
-  // until we make no progress.
-  bool ProgressMade = true;
-  while (ProgressMade) {
-    ProgressMade = false;
-
-    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
-      CopyRec &TheCopy = TryAgainList[i];
-      if (!TheCopy.MI)
-        continue;
-
-      bool Again = false;
-      bool Success = JoinCopy(TheCopy, Again);
-      if (Success || !Again) {
-        TheCopy.MI = 0;   // Mark this one as done.
-        ProgressMade = true;
-      }
-    }
-  }
-}
-
-void SimpleRegisterCoalescing::releaseMemory() {
-  JoinedCopies.clear();
-  ReMatCopies.clear();
-  ReMatDefs.clear();
-}
-
-bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
-  mf_ = &fn;
-  mri_ = &fn.getRegInfo();
-  tm_ = &fn.getTarget();
-  tri_ = tm_->getRegisterInfo();
-  tii_ = tm_->getInstrInfo();
-  li_ = &getAnalysis<LiveIntervals>();
-  ldv_ = &getAnalysis<LiveDebugVariables>();
-  AA = &getAnalysis<AliasAnalysis>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
-
-  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
-               << "********** Function: "
-               << ((Value*)mf_->getFunction())->getName() << '\n');
-
-  if (VerifyCoalescing)
-    mf_->verify(this, "Before register coalescing");
-
-  RegClassInfo.runOnMachineFunction(fn);
-
-  // Join (coalesce) intervals if requested.
-  if (EnableJoining) {
-    joinIntervals();
-    DEBUG({
-        dbgs() << "********** INTERVALS POST JOINING **********\n";
-        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
-             I != E; ++I){
-          I->second->print(dbgs(), tri_);
-          dbgs() << "\n";
-        }
-      });
-  }
-
-  // Perform a final pass over the instructions and compute spill weights
-  // and remove identity moves.
-  SmallVector<unsigned, 4> DeadDefs;
-  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
-       mbbi != mbbe; ++mbbi) {
-    MachineBasicBlock* mbb = mbbi;
-    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
-         mii != mie; ) {
-      MachineInstr *MI = mii;
-      if (JoinedCopies.count(MI)) {
-        // Delete all coalesced copies.
-        bool DoDelete = true;
-        assert(MI->isCopyLike() && "Unrecognized copy instruction");
-        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
-        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-            MI->getNumOperands() > 2)
-          // Do not delete extract_subreg, insert_subreg of physical
-          // registers unless the definition is dead. e.g.
-          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
-          // or else the scavenger may complain. LowerSubregs will
-          // delete them later.
-          DoDelete = false;
-
-        if (MI->allDefsAreDead()) {
-          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
-              li_->hasInterval(SrcReg))
-            li_->shrinkToUses(&li_->getInterval(SrcReg));
-          DoDelete = true;
-        }
-        if (!DoDelete) {
-          // We need the instruction to adjust liveness, so make it a KILL.
-          if (MI->isSubregToReg()) {
-            MI->RemoveOperand(3);
-            MI->RemoveOperand(1);
-          }
-          MI->setDesc(tii_->get(TargetOpcode::KILL));
-          mii = llvm::next(mii);
-        } else {
-          li_->RemoveMachineInstrFromMaps(MI);
-          mii = mbbi->erase(mii);
-          ++numPeep;
-        }
-        continue;
-      }
-
-      // Now check if this is a remat'ed def instruction which is now dead.
-      if (ReMatDefs.count(MI)) {
-        bool isDead = true;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &MO = MI->getOperand(i);
-          if (!MO.isReg())
-            continue;
-          unsigned Reg = MO.getReg();
-          if (!Reg)
-            continue;
-          if (TargetRegisterInfo::isVirtualRegister(Reg))
-            DeadDefs.push_back(Reg);
-          if (MO.isDead())
-            continue;
-          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
-              !mri_->use_nodbg_empty(Reg)) {
-            isDead = false;
-            break;
-          }
-        }
-        if (isDead) {
-          while (!DeadDefs.empty()) {
-            unsigned DeadDef = DeadDefs.back();
-            DeadDefs.pop_back();
-            RemoveDeadDef(li_->getInterval(DeadDef), MI);
-          }
-          li_->RemoveMachineInstrFromMaps(mii);
-          mii = mbbi->erase(mii);
-          continue;
-        } else
-          DeadDefs.clear();
-      }
-
-      ++mii;
-
-      // Check for now unnecessary kill flags.
-      if (li_->isNotInMIMap(MI)) continue;
-      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isKill()) continue;
-        unsigned reg = MO.getReg();
-        if (!reg || !li_->hasInterval(reg)) continue;
-        if (!li_->getInterval(reg).killedAt(DefIdx)) {
-          MO.setIsKill(false);
-          continue;
-        }
-        // When leaving a kill flag on a physreg, check if any subregs should
-        // remain alive.
-        if (!TargetRegisterInfo::isPhysicalRegister(reg))
-          continue;
-        for (const unsigned *SR = tri_->getSubRegisters(reg);
-             unsigned S = *SR; ++SR)
-          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
-            MI->addRegisterDefined(S, tri_);
-      }
-    }
-  }
-
-  DEBUG(dump());
-  DEBUG(ldv_->dump());
-  if (VerifyCoalescing)
-    mf_->verify(this, "After register coalescing");
-  return true;
-}
-
-/// print - Implement the dump method.
-void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
-   li_->print(O, m);
-}
-
-RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
-  return new SimpleRegisterCoalescing();
-}
-
-// Make sure that anything that uses RegisterCoalescer pulls in this file...
-DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 92970e496c25..65a33da93afe 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -87,12 +87,10 @@ FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
 bool SjLjEHPass::doInitialization(Module &M) {
   // Build the function context structure.
   // builtin_setjmp uses a five word jbuf
-  const Type *VoidPtrTy =
-          Type::getInt8PtrTy(M.getContext());
-  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+  Type *Int32Ty = Type::getInt32Ty(M.getContext());
   FunctionContextTy =
-    StructType::get(M.getContext(),
-                    VoidPtrTy,                        // __prev
+    StructType::get(VoidPtrTy,                        // __prev
                     Int32Ty,                          // call_site
                     ArrayType::get(Int32Ty, 4),       // __data
                     VoidPtrTy,                        // __personality
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index bf27cc86574f..761cab7ce850 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -76,12 +76,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
       return LSP.first;
     // There may not be a call instruction (?) in which case we ignore LPad.
     LSP.second = LSP.first;
-    for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin();
-         I != E; --I)
+    for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+         I != E;) {
+      --I;
       if (I->getDesc().isCall()) {
         LSP.second = LIS.getInstructionIndex(I);
         break;
       }
+    }
   }
 
   // If CurLI is live into a landing pad successor, move the last split point
@@ -122,7 +124,7 @@ void SplitAnalysis::analyzeUses() {
   // Compute per-live block info.
   if (!calcLiveBlockInfo()) {
     // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
-    // I am looking at you, SimpleRegisterCoalescing!
+    // I am looking at you, RegisterCoalescer!
     DidRepairRange = true;
     ++NumRepairs;
     DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
@@ -165,7 +167,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
     tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
     // If the block contains no uses, the range must be live through. At one
-    // point, SimpleRegisterCoalescing could create dangling ranges that ended
+    // point, RegisterCoalescer could create dangling ranges that ended
     // mid-block.
     if (UseI == UseE || *UseI >= Stop) {
       ++NumThroughBlocks;
@@ -634,6 +636,7 @@ unsigned SplitEditor::openIntv() {
 void SplitEditor::selectIntv(unsigned Idx) {
   assert(Idx != 0 && "Cannot select the complement interval");
   assert(Idx < Edit->size() && "Can only select previously opened interval");
+  DEBUG(dbgs() << "    selectIntv " << OpenIdx << " -> " << Idx << '\n');
   OpenIdx = Idx;
 }
 
@@ -654,6 +657,24 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
   return VNI->def;
 }
 
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvAfter");
+  DEBUG(dbgs() << "    enterIntvAfter " << Idx);
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvAfter called with invalid index");
+
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
+}
+
 SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
   SlotIndex End = LIS.getMBBEndIdx(&MBB);
@@ -1005,12 +1026,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
         markComplexMapped(i, ParentVNI);
   }
 
-#ifndef NDEBUG
-  // Every new interval must have a def by now, otherwise the split is bogus.
-  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
-    assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
-#endif
-
   // Transfer the simply mapped values, check if any are skipped.
   bool Skipped = transferValues();
   if (Skipped)
@@ -1109,3 +1124,263 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
   }
   finish();
 }
+
+
+//===----------------------------------------------------------------------===//
+//                    Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+                                        unsigned IntvIn, SlotIndex LeaveBefore,
+                                        unsigned IntvOut, SlotIndex EnterAfter){
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+  DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+               << ") intf " << LeaveBefore << '-' << EnterAfter
+               << ", live-through " << IntvIn << " -> " << IntvOut);
+
+  assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+  if (!IntvOut) {
+    DEBUG(dbgs() << ", spill on entry.\n");
+    //
+    //        <<<<<<<<<    Possible LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    -____________    Spill on entry.
+    //
+    selectIntv(IntvIn);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = leaveIntvAtTop(*MBB);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (!IntvIn) {
+    DEBUG(dbgs() << ", reload on exit.\n");
+    //
+    //    >>>>>>>          Possible EnterAfter interference.
+    //    |-----------|    Live through.
+    //    ___________--    Reload on exit.
+    //
+    selectIntv(IntvOut);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = enterIntvAtEnd(*MBB);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+    DEBUG(dbgs() << ", straight through.\n");
+    //
+    //    |-----------|    Live through.
+    //    -------------    Straight through, same intv, no interference.
+    //
+    selectIntv(IntvOut);
+    useIntv(Start, Stop);
+    return;
+  }
+
+  // We cannot legally insert splits after LSP.
+  SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+
+  if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+                  LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+    DEBUG(dbgs() << ", switch avoiding interference.\n");
+    //
+    //    >>>>     <<<<    Non-overlapping EnterAfter/LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    ------=======    Switch intervals between interference.
+    //
+    SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP;
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(Cut);
+    useIntv(Idx, Stop);
+    selectIntv(IntvIn);
+    useIntv(Start, Idx);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  DEBUG(dbgs() << ", create local intv for interference.\n");
+  //
+  //    >>><><><><<<<    Overlapping EnterAfter/LeaveBefore interference.
+  //    |-----------|    Live through.
+  //    ==---------==    Switch intervals before/after interference.
+  //
+  assert(LeaveBefore <= EnterAfter && "Missed case");
+
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  selectIntv(IntvIn);
+  Idx = leaveIntvBefore(LeaveBefore);
+  useIntv(Start, Idx);
+  assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                                  unsigned IntvIn, SlotIndex LeaveBefore) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+               << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+  assert(IntvIn && "Must have register in");
+  assert(BI.LiveIn && "Must be live-in");
+  assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+  if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) {
+    DEBUG(dbgs() << " before interference.\n");
+    //
+    //               <<<    Interference after kill.
+    //     |---o---x   |    Killed in block.
+    //     =========        Use IntvIn everywhere.
+    //
+    selectIntv(IntvIn);
+    useIntv(Start, BI.LastUse);
+    return;
+  }
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) {
+    //
+    //               <<<    Possible interference after last use.
+    //     |---o---o---|    Live-out on stack.
+    //     =========____    Leave IntvIn after last use.
+    //
+    //                 <    Interference after last use.
+    //     |---o---o--o|    Live-out on stack, late last use.
+    //     ============     Copy to stack after LSP, overlap IntvIn.
+    //            \_____    Stack interval is live-out.
+    //
+    if (BI.LastUse < LSP) {
+      DEBUG(dbgs() << ", spill after last use before interference.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvAfter(BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    } else {
+      DEBUG(dbgs() << ", spill before last split point.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvBefore(LSP);
+      overlapIntv(Idx, BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    }
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvIn. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  unsigned LocalIntv = openIntv();
+  (void)LocalIntv;
+  DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+  if (!BI.LiveOut || BI.LastUse < LSP) {
+    //
+    //           <<<<<<<    Interference overlapping uses.
+    //     |---o---o---|    Live-out on stack.
+    //     =====----____    Leave IntvIn before interference, then spill.
+    //
+    SlotIndex To = leaveIntvAfter(BI.LastUse);
+    SlotIndex From = enterIntvBefore(LeaveBefore);
+    useIntv(From, To);
+    selectIntv(IntvIn);
+    useIntv(Start, From);
+    assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+    return;
+  }
+
+  //           <<<<<<<    Interference overlapping uses.
+  //     |---o---o--o|    Live-out on stack, late last use.
+  //     =====-------     Copy to stack before LSP, overlap LocalIntv.
+  //            \_____    Stack interval is live-out.
+  //
+  SlotIndex To = leaveIntvBefore(LSP);
+  overlapIntv(To, BI.LastUse);
+  SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+  useIntv(From, To);
+  selectIntv(IntvIn);
+  useIntv(Start, From);
+  assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                                   unsigned IntvOut, SlotIndex EnterAfter) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+               << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  assert(IntvOut && "Must have register out");
+  assert(BI.LiveOut && "Must be live-out");
+  assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+  if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) {
+    DEBUG(dbgs() << " after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |   o---o---|    Defined in block.
+    //        =========    Use IntvOut everywhere.
+    //
+    selectIntv(IntvOut);
+    useIntv(BI.FirstUse, Stop);
+    return;
+  }
+
+  if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) {
+    DEBUG(dbgs() << ", reload after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |---o---o---|    Live-through, stack-in.
+    //    ____=========    Enter IntvOut before first use.
+    //
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse));
+    useIntv(Idx, Stop);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvOut. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  DEBUG(dbgs() << ", interference overlaps uses.\n");
+  //
+  //    >>>>>>>          Interference overlapping uses.
+  //    |---o---o---|    Live-through, stack-in.
+  //    ____---======    Create local interval for interference range.
+  //
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  openIntv();
+  SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse));
+  useIntv(From, Idx);
+}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 7174c0b55f23..7948b725f856 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -81,6 +81,12 @@ public:
     bool LiveThrough;     ///< Live in whole block (Templ 5. above).
     bool LiveIn;          ///< Current reg is live in.
     bool LiveOut;         ///< Current reg is live out.
+
+    /// isOneInstr - Returns true when this BlockInfo describes a single
+    /// instruction.
+    bool isOneInstr() const {
+      return SlotIndex::isSameInstr(FirstUse, LastUse);
+    }
   };
 
 private:
@@ -360,6 +366,10 @@ public:
   /// Return the beginning of the new live range.
   SlotIndex enterIntvBefore(SlotIndex Idx);
 
+  /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAfter(SlotIndex Idx);
+
   /// enterIntvAtEnd - Enter the open interval at the end of MBB.
   /// Use the open interval from he inserted copy to the MBB end.
   /// Return the beginning of the new live range.
@@ -416,6 +426,42 @@ public:
   /// splitSingleBlocks - Split CurLI into a separate live interval inside each
   /// basic block in Blocks.
   void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+  /// splitLiveThroughBlock - Split CurLI in the given block such that it
+  /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+  /// the block, but they will be ignored when placing split points.
+  ///
+  /// @param MBBNum      Block number.
+  /// @param IntvIn      Interval index entering the block.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitLiveThroughBlock(unsigned MBBNum,
+                             unsigned IntvIn, SlotIndex LeaveBefore,
+                             unsigned IntvOut, SlotIndex EnterAfter);
+
+  /// splitRegInBlock - Split CurLI in the given block such that it enters the
+  /// block in IntvIn and leaves it on the stack (or not at all). Split points
+  /// are placed in a way that avoids putting uses in the stack interval. This
+  /// may require creating a local interval when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvIn      Interval index entering the block. Not 0.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                       unsigned IntvIn, SlotIndex LeaveBefore);
+
+  /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+  /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+  /// Split points are placed to avoid interference and such that the uses are
+  /// not in the stack interval. This may require creating a local interval
+  /// when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                        unsigned IntvOut, SlotIndex EnterAfter);
 };
 
 }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index 08aee82b8c5c..ec75df4b7d1f 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -11,7 +11,7 @@
 
 #include "Splitter.h"
 
-#include "SimpleRegisterCoalescing.h"
+#include "RegisterCoalescer.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index f0a44abaf5cd..d3cbd15b64e8 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -186,7 +186,7 @@ bool StackProtector::InsertStackProtectors() {
       Value *Args[] = { LI, AI };
       CallInst::
         Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
-               &Args[0], array_endof(Args), "", InsPt);
+               Args, "", InsPt);
 
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 01f5b5627f4f..57cbe1ba5960 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -504,7 +504,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
     bool FoundDef = false;  // Not counting 2address def.
 
     Uses.clear();
-    const TargetInstrDesc &TID = MII->getDesc();
+    const MCInstrDesc &MCID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MII->getOperand(i);
       if (!MO.isReg())
@@ -521,7 +521,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
         if (MO.getSubReg() || MII->isSubregToReg())
           return false;
 
-        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
         if (RC && !RC->contains(NewReg))
           return false;
 
@@ -566,7 +566,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
   SmallVector<MachineOperand*, 4> Uses;
   while (++MII != MBB->end()) {
     bool FoundKill = false;
-    const TargetInstrDesc &TID = MII->getDesc();
+    const MCInstrDesc &MCID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MII->getOperand(i);
       if (!MO.isReg())
@@ -583,7 +583,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
         if (MO.getSubReg())
           return false;
 
-        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
         if (RC && !RC->contains(NewReg))
           return false;
         if (MO.isKill())
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index e8eab8f5cf61..6b801cbf6e1e 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -95,10 +95,22 @@ namespace {
                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
     bool shouldTailDuplicate(const MachineFunction &MF,
-                             MachineBasicBlock &TailBB);
-    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                             bool IsSimple, MachineBasicBlock &TailBB);
+    bool isSimpleBB(MachineBasicBlock *TailBB);
+    bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+    bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           const DenseSet<unsigned> &RegsUsedByPhi,
+                           SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicate(MachineBasicBlock *TailBB,
+                       bool IsSimple,
+                       MachineFunction &MF,
                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                        SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                bool IsSimple,
+                                MachineFunction &MF);
+
     void RemoveDeadBlock(MachineBasicBlock *MBB);
   };
 
@@ -169,6 +181,109 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
   }
 }
 
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                          bool IsSimple,
+                                          MachineFunction &MF) {
+  // Save the successors list.
+  SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+                                              MBB->succ_end());
+
+  SmallVector<MachineBasicBlock*, 8> TDBBs;
+  SmallVector<MachineInstr*, 16> Copies;
+  if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+    return false;
+
+  ++NumTails;
+
+  SmallVector<MachineInstr*, 8> NewPHIs;
+  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+  // TailBB's immediate successors are now successors of those predecessors
+  // which duplicated TailBB. Add the predecessors as sources to the PHI
+  // instructions.
+  bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+  if (PreRegAlloc)
+    UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+  // If it is dead, remove it.
+  if (isDead) {
+    NumInstrDups -= MBB->size();
+    RemoveDeadBlock(MBB);
+    ++NumDeadBlocks;
+  }
+
+  // Update SSA form.
+  if (!SSAUpdateVRs.empty()) {
+    for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+      unsigned VReg = SSAUpdateVRs[i];
+      SSAUpdate.Initialize(VReg);
+
+      // If the original definition is still around, add it as an available
+      // value.
+      MachineInstr *DefMI = MRI->getVRegDef(VReg);
+      MachineBasicBlock *DefBB = 0;
+      if (DefMI) {
+        DefBB = DefMI->getParent();
+        SSAUpdate.AddAvailableValue(DefBB, VReg);
+      }
+
+      // Add the new vregs as available values.
+      DenseMap<unsigned, AvailableValsTy>::iterator LI =
+        SSAUpdateVals.find(VReg);
+      for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+        MachineBasicBlock *SrcBB = LI->second[j].first;
+        unsigned SrcReg = LI->second[j].second;
+        SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+      }
+
+      // Rewrite uses that are outside of the original def's block.
+      MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+      while (UI != MRI->use_end()) {
+        MachineOperand &UseMO = UI.getOperand();
+        MachineInstr *UseMI = &*UI;
+        ++UI;
+        if (UseMI->isDebugValue()) {
+          // SSAUpdate can replace the use with an undef. That creates
+          // a debug instruction that is a kill.
+          // FIXME: Should it SSAUpdate job to delete debug instructions
+          // instead of replacing the use with undef?
+          UseMI->eraseFromParent();
+          continue;
+        }
+        if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+          continue;
+        SSAUpdate.RewriteUse(UseMO);
+      }
+    }
+
+    SSAUpdateVRs.clear();
+    SSAUpdateVals.clear();
+  }
+
+  // Eliminate some of the copies inserted by tail duplication to maintain
+  // SSA form.
+  for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+    MachineInstr *Copy = Copies[i];
+    if (!Copy->isCopy())
+      continue;
+    unsigned Dst = Copy->getOperand(0).getReg();
+    unsigned Src = Copy->getOperand(1).getReg();
+    MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+    if (++UI == MRI->use_end()) {
+      // Copy is the only use. Do trivial copy propagation here.
+      MRI->replaceRegWith(Dst, Src);
+      Copy->eraseFromParent();
+    }
+  }
+
+  if (NewPHIs.size())
+    NumAddedPHIs += NewPHIs.size();
+
+  return true;
+}
+
 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
 /// branched to and do not fall through. Tail-duplicate their instructions
 /// into their predecessors to eliminate (dynamic) branches.
@@ -180,100 +295,22 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
     VerifyPHIs(MF, true);
   }
 
-  SmallVector<MachineInstr*, 8> NewPHIs;
-  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
-
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
 
     if (NumTails == TailDupLimit)
       break;
 
-    // Save the successors list.
-    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
-                                                MBB->succ_end());
-
-    SmallVector<MachineBasicBlock*, 8> TDBBs;
-    SmallVector<MachineInstr*, 16> Copies;
-    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
-      ++NumTails;
-
-      // TailBB's immediate successors are now successors of those predecessors
-      // which duplicated TailBB. Add the predecessors as sources to the PHI
-      // instructions.
-      bool isDead = MBB->pred_empty();
-      if (PreRegAlloc)
-        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
-
-      // If it is dead, remove it.
-      if (isDead) {
-        NumInstrDups -= MBB->size();
-        RemoveDeadBlock(MBB);
-        ++NumDeadBlocks;
-      }
-
-      // Update SSA form.
-      if (!SSAUpdateVRs.empty()) {
-        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
-          unsigned VReg = SSAUpdateVRs[i];
-          SSAUpdate.Initialize(VReg);
-
-          // If the original definition is still around, add it as an available
-          // value.
-          MachineInstr *DefMI = MRI->getVRegDef(VReg);
-          MachineBasicBlock *DefBB = 0;
-          if (DefMI) {
-            DefBB = DefMI->getParent();
-            SSAUpdate.AddAvailableValue(DefBB, VReg);
-          }
-
-          // Add the new vregs as available values.
-          DenseMap<unsigned, AvailableValsTy>::iterator LI =
-            SSAUpdateVals.find(VReg);  
-          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
-            MachineBasicBlock *SrcBB = LI->second[j].first;
-            unsigned SrcReg = LI->second[j].second;
-            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
-          }
-
-          // Rewrite uses that are outside of the original def's block.
-          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
-          while (UI != MRI->use_end()) {
-            MachineOperand &UseMO = UI.getOperand();
-            MachineInstr *UseMI = &*UI;
-            ++UI;
-            if (UseMI->getParent() == DefBB && !UseMI->isPHI())
-              continue;
-            SSAUpdate.RewriteUse(UseMO);
-          }
-        }
+    bool IsSimple = isSimpleBB(MBB);
 
-        SSAUpdateVRs.clear();
-        SSAUpdateVals.clear();
-      }
-
-      // Eliminate some of the copies inserted by tail duplication to maintain
-      // SSA form.
-      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
-        MachineInstr *Copy = Copies[i];
-        if (!Copy->isCopy())
-          continue;
-        unsigned Dst = Copy->getOperand(0).getReg();
-        unsigned Src = Copy->getOperand(1).getReg();
-        MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
-        if (++UI == MRI->use_end()) {
-          // Copy is the only use. Do trivial copy propagation here.
-          MRI->replaceRegWith(Dst, Src);
-          Copy->eraseFromParent();
-        }
-      }
+    if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+      continue;
 
-      if (PreRegAlloc && TailDupVerify)
-        VerifyPHIs(MF, false);
-      MadeChange = true;
-    }
+    MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
   }
-  NumAddedPHIs += NewPHIs.size();
+
+  if (PreRegAlloc && TailDupVerify)
+    VerifyPHIs(MF, false);
 
   return MadeChange;
 }
@@ -283,6 +320,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
+    if (UseMI->isDebugValue())
+      continue;
     if (UseMI->getParent() != BB)
       return true;
   }
@@ -485,11 +524,16 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
 bool
 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+                                       bool IsSimple,
                                        MachineBasicBlock &TailBB) {
   // Only duplicate blocks that end with unconditional branches.
   if (TailBB.canFallThrough())
     return false;
 
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB.isSuccessor(&TailBB))
+    return false;
+
   // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
@@ -500,73 +544,208 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
-  if (PreRegAlloc) {
-    if (TailBB.empty())
-      return false;
-    const TargetInstrDesc &TID = TailBB.back().getDesc();
-    // Pre-regalloc tail duplication hurts compile time and doesn't help
-    // much except for indirect branches.
-    if (!TID.isIndirectBranch())
-      return false;
-    // If the target has hardware branch prediction that can handle indirect
-    // branches, duplicating them can often make them predictable when there
-    // are common paths through the code.  The limit needs to be high enough
-    // to allow undoing the effects of tail merging and other optimizations
-    // that rearrange the predecessors of the indirect branch.
-    MaxDuplicateCount = 20;
-  }
+  // If the target has hardware branch prediction that can handle indirect
+  // branches, duplicating them can often make them predictable when there
+  // are common paths through the code.  The limit needs to be high enough
+  // to allow undoing the effects of tail merging and other optimizations
+  // that rearrange the predecessors of the indirect branch.
 
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB.isSuccessor(&TailBB))
-    return false;
+  bool HasIndirectbr = false;
+  if (!TailBB.empty())
+    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+
+  if (HasIndirectbr && PreRegAlloc)
+    MaxDuplicateCount = 20;
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  bool HasCall = false;
   for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
        ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
+    if (I->getDesc().isNotDuplicable())
+      return false;
+
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn()) return false;
-    // Don't duplicate more than the threshold.
-    if (InstrCount == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
+    if (PreRegAlloc && I->getDesc().isReturn())
+      return false;
+
+    // Avoid duplicating calls before register allocation. Calls presents a
+    // barrier to register allocation so duplicating them may end up increasing
+    // spills.
+    if (PreRegAlloc && I->getDesc().isCall())
+      return false;
+
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
+
+    if (InstrCount > MaxDuplicateCount)
+      return false;
   }
-  // Don't tail-duplicate calls before register allocation. Calls presents a
-  // barrier to register allocation so duplicating them may end up increasing
-  // spills.
-  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+
+  if (HasIndirectbr && PreRegAlloc)
+    return true;
+
+  if (IsSimple)
+    return true;
+
+  if (!PreRegAlloc)
+    return true;
+
+  return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+  if (TailBB->succ_size() != 1)
+    return false;
+  if (TailBB->pred_empty())
     return false;
+  MachineBasicBlock::iterator I = TailBB->begin();
+  MachineBasicBlock::iterator E = TailBB->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I == E)
+    return true;
+  return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+              SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+         SE = A.succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *BB = *SI;
+    if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+      return true;
+  }
+
+  return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->succ_size() > 1)
+      return false;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      return false;
 
+    if (!PredCond.empty())
+      return false;
+  }
   return true;
 }
 
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                     const DenseSet<unsigned> &UsedByPhi,
+                                     SmallVector<MachineInstr*, 16> &Copies) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+                                           TailBB->succ_end());
+  SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                           TailBB->pred_end());
+  bool Changed = false;
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->getLandingPadSuccessor())
+      continue;
+
+    if (bothUsedInPHI(*PredBB, Succs))
+      continue;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+
+    Changed = true;
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From simple Succ: " << *TailBB);
+
+    MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+    // Make PredFBB explicit.
+    if (PredCond.empty())
+      PredFBB = PredTBB;
+
+    // Make fall through explicit.
+    if (!PredTBB)
+      PredTBB = NextBB;
+    if (!PredFBB)
+      PredFBB = NextBB;
+
+    // Redirect
+    if (PredFBB == TailBB)
+      PredFBB = NewTarget;
+    if (PredTBB == TailBB)
+      PredTBB = NewTarget;
+
+    // Make the branch unconditional if possible
+    if (PredTBB == PredFBB) {
+      PredCond.clear();
+      PredFBB = NULL;
+    }
+
+    // Avoid adding fall through branches.
+    if (PredFBB == NextBB)
+      PredFBB = NULL;
+    if (PredTBB == NextBB && PredFBB == NULL)
+      PredTBB = NULL;
+
+    TII->RemoveBranch(*PredBB);
+
+    if (PredTBB)
+      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+    PredBB->removeSuccessor(TailBB);
+    unsigned NumSuccessors = PredBB->succ_size();
+    assert(NumSuccessors <= 1);
+    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+      PredBB->addSuccessor(NewTarget);
+
+    TDBBs.push_back(PredBB);
+  }
+  return Changed;
+}
+
 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
 bool
-TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+                                 bool IsSimple,
+                                 MachineFunction &MF,
                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                  SmallVector<MachineInstr*, 16> &Copies) {
-  if (!shouldTailDuplicate(MF, *TailBB))
-    return false;
-
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+  if (IsSimple)
+    return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
   // avoids trouble with the predecessor list reallocating.
   bool Changed = false;
   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                               TailBB->pred_end());
-  DenseSet<unsigned> UsedByPhi;
-  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
@@ -618,6 +797,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                TII->get(TargetOpcode::COPY),
                                CopyInfos[i].first).addReg(CopyInfos[i].second));
     }
+
+    // Simplify
+    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 34e2b33185b5..86e71d8ccbb6 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -59,8 +59,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 // the two operands returned by findCommutedOpIndices.
 MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
                                                       bool NewMI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  bool HasDef = TID.getNumDefs();
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool HasDef = MCID.getNumDefs();
   if (HasDef && !MI->getOperand(0).isReg())
     // No idea how to commute this instruction. Target should implement its own.
     return 0;
@@ -81,7 +81,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   bool ChangeReg0 = false;
   if (HasDef && MI->getOperand(0).getReg() == Reg1) {
     // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
            "Expecting a two-address instruction!");
     Reg2IsKill = false;
     ChangeReg0 = true;
@@ -119,12 +119,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
                                                 unsigned &SrcOpIdx1,
                                                 unsigned &SrcOpIdx2) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isCommutable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isCommutable())
     return false;
   // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
   // is not true, then the target must implement this.
-  SrcOpIdx1 = TID.getNumDefs();
+  SrcOpIdx1 = MCID.getNumDefs();
   SrcOpIdx2 = SrcOpIdx1 + 1;
   if (!MI->getOperand(SrcOpIdx1).isReg() ||
       !MI->getOperand(SrcOpIdx2).isReg())
@@ -137,12 +137,12 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return false;
 
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate()) {
+    if (MCID.OpInfo[i].isPredicate()) {
       MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg()) {
         MO.setReg(Pred[j].getReg());
@@ -332,10 +332,10 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
       MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
     return true;
 
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
 
   // Avoid instructions obviously unsafe for remat.
-  if (TID.isNotDuplicable() || TID.mayStore() ||
+  if (MCID.isNotDuplicable() || MCID.mayStore() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
@@ -345,7 +345,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index cdac42d1bf17..a3c562013b59 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,6 +43,19 @@ using namespace dwarf;
 //                                  ELF
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    DataRelSection(0),
+    DataRelLocalSection(0),
+    DataRelROSection(0),
+    DataRelROLocalSection(0),
+    MergeableConst4Section(0),
+    MergeableConst8Section(0),
+    MergeableConst16Section(0) {
+}
+
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
   TargetLoweringObjectFile::Initialize(Ctx, TM);
@@ -189,8 +202,8 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
     return  Mang->getSymbol(GV);
     break;
   case dwarf::DW_EH_PE_pcrel: {
-    Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName();
-    return getContext().GetOrCreateSymbol(FullName);
+    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+                                          Mang->getSymbol(GV)->getName());
     break;
   }
   }
@@ -199,13 +212,13 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
 void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const TargetMachine &TM,
                                                        const MCSymbol *Sym) const {
-  Twine FullName = StringRef("DW.ref.") + Sym->getName();
-  MCSymbol *Label = getContext().GetOrCreateSymbol(FullName);
+  SmallString<64> NameData("DW.ref.");
+  NameData += Sym->getName();
+  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
-  Twine SectionName = StringRef(".data.") + Label->getName();
-  SmallString<64> NameData;
-  SectionName.toVector(NameData);
+  StringRef Prefix = ".data.";
+  NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
   const MCSection *Sec = getContext().getELFSection(NameData,
                                                     ELF::SHT_PROGBITS,
@@ -480,6 +493,27 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    TLSTLVSection(0),
+    TLSThreadInitSection(0),
+    CStringSection(0),
+    UStringSection(0),
+    TextCoalSection(0),
+    ConstTextCoalSection(0),
+    ConstDataSection(0),
+    DataCoalSection(0),
+    DataCommonSection(0),
+    DataBSSSection(0),
+    FourByteConstantSection(0),
+    EightByteConstantSection(0),
+    SixteenByteConstantSection(0),
+    LazySymbolPointerSection(0),
+    NonLazySymbolPointerSection(0) {
+}
+
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
   IsFunctionEHFrameSymbolPrivate = false;
@@ -605,6 +639,13 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   // Exception Handling.
   LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                              SectionKind::getReadOnlyWithRel());
+
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+    CompactUnwindSection =
+      getContext().getMachOSection("__LD", "__compact_unwind",
+                                   MCSectionMachO::S_ATTR_DEBUG,
+                                   SectionKind::getReadOnly());
+
   // Debug Information.
   DwarfAbbrevSection =
     getContext().getMachOSection("__DWARF", "__debug_abbrev",
@@ -884,6 +925,13 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
 //                                  COFF
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF()
+  : TargetLoweringObjectFile(),
+    DrectveSection(0),
+    PDataSection(0),
+    XDataSection(0) {
+}
+
 void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                               const TargetMachine &TM) {
   TargetLoweringObjectFile::Initialize(Ctx, TM);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index f54d879759ff..6d6244e4f879 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -280,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
 /// isTwoAddrUse - Return true if the specified MI is using the specified
 /// register as a two-address operand.
 static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
-  const TargetInstrDesc &TID = UseMI->getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+  const MCInstrDesc &MCID = UseMI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = UseMI->getOperand(i);
     if (MO.isReg() && MO.getReg() == Reg &&
         (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
@@ -443,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
 /// isTwoAddrUse - Return true if the specified MI uses the specified register
 /// as a two-address use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands();
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned NumOps = MI.isInlineAsm()
+    ? MI.getNumOperands() : MCID.getNumOperands();
   for (unsigned i = 0; i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -761,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.mayStore() || TID.isCall())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayStore() || MCID.isCall())
     return false;
-  if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
+  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -854,7 +855,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineFunction::iterator &mbbi,
                         unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
                         SmallPtrSet<MachineInstr*, 8> &Processed) {
-  const TargetInstrDesc &TID = mi->getDesc();
+  const MCInstrDesc &MCID = mi->getDesc();
   unsigned regA = mi->getOperand(DstIdx).getReg();
   unsigned regB = mi->getOperand(SrcIdx).getReg();
 
@@ -876,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
       TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
@@ -907,7 +908,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
-  if (TID.isConvertibleTo3Addr()) {
+  if (MCID.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -927,7 +928,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   movq (%rax), %rcx
   //   addq %rdx, %rcx
   // because it's preferable to schedule a load than a register copy.
-  if (TID.mayLoad() && !regBKilled) {
+  if (MCID.mayLoad() && !regBKilled) {
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
@@ -936,14 +937,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                       /*UnfoldStore=*/false,
                                       &LoadRegIndex);
     if (NewOpc != 0) {
-      const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
-      if (UnfoldTID.getNumDefs() == 1) {
+      const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+      if (UnfoldMCID.getNumDefs() == 1) {
         MachineFunction &MF = *mbbi->getParent();
 
         // Unfold the load.
         DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
         const TargetRegisterClass *RC =
-          UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+          TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
         if (!TII->unfoldMemoryOperand(MF, mi, Reg,
@@ -1067,7 +1068,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       if (mi->isRegSequence())
         RegSequences.push_back(&*mi);
 
-      const TargetInstrDesc &TID = mi->getDesc();
+      const MCInstrDesc &MCID = mi->getDesc();
       bool FirstTied = true;
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
@@ -1077,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
       unsigned NumOps = mi->isInlineAsm()
-        ? mi->getNumOperands() : TID.getNumOperands();
+        ? mi->getNumOperands() : MCID.getNumOperands();
       for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
         unsigned DstIdx = 0;
         if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
@@ -1095,12 +1096,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
                "two address instruction invalid");
 
         unsigned regB = mi->getOperand(SrcIdx).getReg();
-        TiedOperandMap::iterator OI = TiedOperands.find(regB);
-        if (OI == TiedOperands.end()) {
-          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
-          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
-        }
-        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+        TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
       }
 
       // Now iterate over the information collected above.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index ba50f4e42302..03abff356934 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -208,6 +208,11 @@ namespace llvm {
     /// @brief returns the register allocation preference.
     unsigned getRegAllocPref(unsigned virtReg);
 
+    /// @brief returns true if VirtReg is assigned to its preferred physreg.
+    bool hasPreferredPhys(unsigned VirtReg) {
+      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
+    }
+
     /// @brief records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
       Virt2SplitMap[virtReg] = SReg;
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 185065880581..a5ec797b27db 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -679,8 +679,8 @@ static void ReMaterialize(MachineBasicBlock &MBB,
                           VirtRegMap &VRM) {
   MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
 #ifndef NDEBUG
-  const TargetInstrDesc &TID = ReMatDefMI->getDesc();
-  assert(TID.getNumDefs() == 1 &&
+  const MCInstrDesc &MCID = ReMatDefMI->getDesc();
+  assert(MCID.getNumDefs() == 1 &&
          "Don't know how to remat instructions that define > 1 values!");
 #endif
   TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
@@ -1483,11 +1483,11 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
 /// where SrcReg is r1 and it is tied to r0. Return true if after
 /// commuting this instruction it will be r0 = op r2, r1.
 static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                      const TargetInstrDesc &TID,
+                                      const MCInstrDesc &MCID,
                                       unsigned SrcReg,
                                       const TargetInstrInfo *TII,
                                       unsigned &DstIdx) {
-  if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+  if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
     return false;
   if (!DefMI->getOperand(1).isReg() ||
       DefMI->getOperand(1).getReg() != SrcReg)
@@ -1527,11 +1527,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
   MachineInstr &MI = *MII;
   MachineBasicBlock::iterator DefMII = prior(MII);
   MachineInstr *DefMI = DefMII;
-  const TargetInstrDesc &TID = DefMI->getDesc();
+  const MCInstrDesc &MCID = DefMI->getDesc();
   unsigned NewDstIdx;
   if (DefMII != MBB->begin() &&
-      TID.isCommutable() &&
-      CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+      MCID.isCommutable() &&
+      CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
     MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
     unsigned NewReg = NewDstMO.getReg();
     if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
@@ -1658,9 +1658,9 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
 /// isSafeToDelete - Return true if this instruction doesn't produce any side
 /// effect and all of its defs are dead.
 static bool isSafeToDelete(MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
-      TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+  const MCInstrDesc &MCID = MI.getDesc();
+  if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
+      MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
       MI.isLabel() || MI.isDebugValue() ||
       MI.hasUnmodeledSideEffects())
     return false;
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 062256a2ac73..f7e2a4df951e 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -78,7 +78,6 @@ static char getTypeID(const Type *Ty) {
   case Type::FunctionTyID:return 'M';
   case Type::StructTyID:  return 'T';
   case Type::ArrayTyID:   return 'A';
-  case Type::OpaqueTyID:  return 'O';
   default: return 'U';
   }
 }
@@ -282,10 +281,10 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 
   if (F->getName() == "__main")
     errs() << "Tried to execute an unknown external function: "
-      << F->getType()->getDescription() << " __main\n";
+      << *F->getType() << " __main\n";
   else
     report_fatal_error("Tried to execute an unknown external function: " +
-                      F->getType()->getDescription() + " " +F->getName());
+                       F->getName());
 #ifndef USE_LIBFFI
   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
 #endif
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 8fceaf2b4931..445d2d0670c8 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -533,8 +533,7 @@ GenericValue JIT::runFunction(Function *F,
     Args.push_back(C);
   }
 
-  CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(),
-                                       "", StubBB);
+  CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
   TheCall->setCallingConv(F->getCallingConv());
   TheCall->setTailCall();
   if (!TheCall->getType()->isVoidTy())
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index 9e53f8757ec0..59bdfee3db43 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_llvm_library(LLVMRuntimeDyld
   RuntimeDyld.cpp
+  RuntimeDyldMachO.cpp
   )
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index eda4cbbad52a..33dd70502798 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,118 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Object/MachOObject.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/raw_ostream.h"
+#include "RuntimeDyldImpl.h"
 using namespace llvm;
 using namespace llvm::object;
 
 // Empty out-of-line virtual destructor as the key function.
 RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
-class RuntimeDyldImpl {
-  unsigned CPUType;
-  unsigned CPUSubtype;
-
-  // The MemoryManager to load objects into.
-  RTDyldMemoryManager *MemMgr;
-
-  // FIXME: This all assumes we're dealing with external symbols for anything
-  //        explicitly referenced. I.e., we can index by name and things
-  //        will work out. In practice, this may not be the case, so we
-  //        should find a way to effectively generalize.
-
-  // For each function, we have a MemoryBlock of it's instruction data.
-  StringMap<sys::MemoryBlock> Functions;
-
-  // Master symbol table. As modules are loaded and external symbols are
-  // resolved, their addresses are stored here.
-  StringMap<uint8_t*> SymbolTable;
-
-  // For each symbol, keep a list of relocations based on it. Anytime
-  // its address is reassigned (the JIT re-compiled the function, e.g.),
-  // the relocations get re-resolved.
-  struct RelocationEntry {
-    std::string Target;     // Object this relocation is contained in.
-    uint64_t    Offset;     // Offset into the object for the relocation.
-    uint32_t    Data;       // Second word of the raw macho relocation entry.
-    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
-    bool        isResolved; // Has this relocation been resolved previously?
-
-    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
-      : Target(t), Offset(offset), Data(data), Addend(addend),
-        isResolved(false) {}
-  };
-  typedef SmallVector<RelocationEntry, 4> RelocationList;
-  StringMap<RelocationList> Relocations;
-
-  // FIXME: Also keep a map of all the relocations contained in an object. Use
-  // this to dynamically answer whether all of the relocations in it have
-  // been resolved or not.
-
-  bool HasError;
-  std::string ErrorStr;
-
-  // Set the error state and record an error string.
-  bool Error(const Twine &Msg) {
-    ErrorStr = Msg.str();
-    HasError = true;
-    return true;
-  }
-
-  void extractFunction(StringRef Name, uint8_t *StartAddress,
-                       uint8_t *EndAddress);
-  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                         unsigned Type, unsigned Size);
-  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                               unsigned Type, unsigned Size);
-  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                            unsigned Type, unsigned Size);
-
-  bool loadSegment32(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool loadSegment64(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
-public:
-  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
-
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void *getSymbolAddress(StringRef Name) {
-    // FIXME: Just look up as a function for now. Overly simple of course.
-    // Work in progress.
-    return SymbolTable.lookup(Name);
-  }
-
-  void resolveRelocations();
-
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-
-  // Is the linker in an error state?
-  bool hasError() { return HasError; }
-
-  // Mark the error condition as handled and continue.
-  void clearError() { HasError = false; }
-
-  // Get the error message.
-  StringRef getErrorString() { return ErrorStr; }
-};
 
 void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
                                       uint8_t *EndAddress) {
@@ -144,472 +41,6 @@ void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
   DEBUG(dbgs() << "    allocated to [" << Mem << ", " << Mem + Size << "]\n");
 }
 
-bool RuntimeDyldImpl::
-resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                  unsigned Type, unsigned Size) {
-  // This just dispatches to the proper target specific routine.
-  switch (CPUType) {
-  default: assert(0 && "Unsupported CPU type!");
-  case mach::CTM_x86_64:
-    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
-                                   isPCRel, Type, Size);
-  case mach::CTM_ARM:
-    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
-                                isPCRel, Type, Size);
-  }
-  llvm_unreachable("");
-}
-
-bool RuntimeDyldImpl::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
-                        bool isPCRel, unsigned Type,
-                        unsigned Size) {
-  // If the relocation is PC-relative, the value to be encoded is the
-  // pointer difference.
-  if (isPCRel)
-    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
-    // address. Is that expected? Only for branches, perhaps?
-    Value -= Address + 4;
-
-  switch(Type) {
-  default:
-    llvm_unreachable("Invalid relocation type!");
-  case macho::RIT_X86_64_Unsigned:
-  case macho::RIT_X86_64_Branch: {
-    // Mask in the target value a byte at a time (we don't have an alignment
-    // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
-    for (unsigned i = 0; i < Size; ++i) {
-      *p++ = (uint8_t)Value;
-      Value >>= 8;
-    }
-    return false;
-  }
-  case macho::RIT_X86_64_Signed:
-  case macho::RIT_X86_64_GOTLoad:
-  case macho::RIT_X86_64_GOT:
-  case macho::RIT_X86_64_Subtractor:
-  case macho::RIT_X86_64_Signed1:
-  case macho::RIT_X86_64_Signed2:
-  case macho::RIT_X86_64_Signed4:
-  case macho::RIT_X86_64_TLV:
-    return Error("Relocation type not implemented yet!");
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
-                                           bool isPCRel, unsigned Type,
-                                           unsigned Size) {
-  // If the relocation is PC-relative, the value to be encoded is the
-  // pointer difference.
-  if (isPCRel) {
-    Value -= Address;
-    // ARM PCRel relocations have an effective-PC offset of two instructions
-    // (four bytes in Thumb mode, 8 bytes in ARM mode).
-    // FIXME: For now, assume ARM mode.
-    Value -= 8;
-  }
-
-  switch(Type) {
-  default:
-    llvm_unreachable("Invalid relocation type!");
-  case macho::RIT_Vanilla: {
-    llvm_unreachable("Invalid relocation type!");
-    // Mask in the target value a byte at a time (we don't have an alignment
-    // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
-    for (unsigned i = 0; i < Size; ++i) {
-      *p++ = (uint8_t)Value;
-      Value >>= 8;
-    }
-    break;
-  }
-  case macho::RIT_ARM_Branch24Bit: {
-    // Mask the value into the target address. We know instructions are
-    // 32-bit aligned, so we can do it all at once.
-    uint32_t *p = (uint32_t*)Address;
-    // The low two bits of the value are not encoded.
-    Value >>= 2;
-    // Mask the value to 24 bits.
-    Value &= 0xffffff;
-    // FIXME: If the destination is a Thumb function (and the instruction
-    // is a non-predicated BL instruction), we need to change it to a BLX
-    // instruction instead.
-
-    // Insert the value into the instruction.
-    *p = (*p & ~0xffffff) | Value;
-    break;
-  }
-  case macho::RIT_ARM_ThumbBranch22Bit:
-  case macho::RIT_ARM_ThumbBranch32Bit:
-  case macho::RIT_ARM_Half:
-  case macho::RIT_ARM_HalfDifference:
-  case macho::RIT_Pair:
-  case macho::RIT_Difference:
-  case macho::RIT_ARM_LocalDifference:
-  case macho::RIT_ARM_PreboundLazyPointer:
-    return Error("Relocation type not implemented yet!");
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::
-loadSegment32(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
-  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
-  if (!SegmentLC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::SymbolTableEntry> STE;
-      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > SegmentLC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
-      // doesn't require any special handling here.
-      if (STE->Flags != 0x0 && STE->Flags != 0x8)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
-    }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
-                                           SegmentLC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
-    }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      // FIXME: Some targets (ARM) use internal relocations even for
-      // externally visible symbols, if the definition is in the same
-      // file as the reference. We need to convert those back to by-name
-      // references. We can resolve the address based on the section
-      // offset and see if we have a symbol at that address. If we do,
-      // use that; otherwise, puke.
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
-    }
-  }
-  return false;
-}
-
-
-bool RuntimeDyldImpl::
-loadSegment64(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
-  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
-  if (!Segment64LC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::Symbol64TableEntry> STE;
-      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > Segment64LC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      if (STE->Flags != 0x0)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
-    }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
-                                           Segment64LC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
-    }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
-    }
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
-  // If the linker is in an error state, don't do anything.
-  if (hasError())
-    return true;
-  // Load the Mach-O wrapper object.
-  std::string ErrorStr;
-  OwningPtr<MachOObject> Obj(
-    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
-  if (!Obj)
-    return Error("unable to load object: '" + ErrorStr + "'");
-
-  // Get the CPU type information from the header.
-  const macho::Header &Header = Obj->getHeader();
-
-  // FIXME: Error checking that the loaded object is compatible with
-  //        the system we're running on.
-  CPUType = Header.CPUType;
-  CPUSubtype = Header.CPUSubtype;
-
-  // Validate that the load commands match what we expect.
-  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
-    *DysymtabLCI = 0;
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
-    switch (LCI.Command.Type) {
-    case macho::LCT_Segment:
-    case macho::LCT_Segment64:
-      if (SegmentLCI)
-        return Error("unexpected input object (multiple segments)");
-      SegmentLCI = &LCI;
-      break;
-    case macho::LCT_Symtab:
-      if (SymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      SymtabLCI = &LCI;
-      break;
-    case macho::LCT_Dysymtab:
-      if (DysymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      DysymtabLCI = &LCI;
-      break;
-    default:
-      return Error("unexpected input object (unexpected load command");
-    }
-  }
-
-  if (!SymtabLCI)
-    return Error("no symbol table found in object");
-  if (!SegmentLCI)
-    return Error("no symbol table found in object");
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-  if (!SymtabLC)
-    return Error("unable to load symbol table load command");
-  Obj->RegisterStringTable(*SymtabLC);
-
-  // Read the dynamic link-edit information, if present (not present in static
-  // objects).
-  if (DysymtabLCI) {
-    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
-    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
-    if (!DysymtabLC)
-      return Error("unable to load dynamic link-exit load command");
-
-    // FIXME: We don't support anything interesting yet.
-//    if (DysymtabLC->LocalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: local symbol entries");
-//    if (DysymtabLC->ExternalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
-  }
-
-  // Load the segment load command.
-  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
-    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  } else {
-    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  }
-
-  return false;
-}
-
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
   // Just iterate over the symbols in our symbol table and assign their
@@ -620,35 +51,11 @@ void RuntimeDyldImpl::resolveRelocations() {
     reassignSymbolAddress(i->getKey(), i->getValue());
 }
 
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
-  // Assign the address in our symbol table.
-  SymbolTable[Name] = Addr;
-
-  RelocationList &Relocs = Relocations[Name];
-  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    RelocationEntry &RE = Relocs[i];
-    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
-    bool isPCRel = (RE.Data >> 24) & 1;
-    unsigned Type = (RE.Data >> 28) & 0xf;
-    unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
-    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
-          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
-          << " from '" << Name << " (" << format("%p", Addr) << ")"
-          << "(" << (isPCRel ? "pcrel" : "absolute")
-          << ", type: " << Type << ", Size: " << Size << ").\n");
-
-    resolveRelocation(Target, Addr, isPCRel, Type, Size);
-    RE.isResolved = true;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
-RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
-  Dyld = new RuntimeDyldImpl(MM);
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+  Dyld = 0;
+  MM = mm;
 }
 
 RuntimeDyld::~RuntimeDyld() {
@@ -656,6 +63,16 @@ RuntimeDyld::~RuntimeDyld() {
 }
 
 bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
+  if (!Dyld) {
+    if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
+      Dyld = new RuntimeDyldMachO(MM);
+    else
+      report_fatal_error("Unknown object format!");
+  } else {
+    if(!Dyld->isCompatibleFormat(InputBuffer))
+      report_fatal_error("Incompatible object format!");
+  }
+
   return Dyld->loadObject(InputBuffer);
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
new file mode 100644
index 000000000000..bcdfb04801a5
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -0,0 +1,152 @@
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the implementations of runtime dynamic linker facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_IMPL_H
+#define LLVM_RUNTIME_DYLD_IMPL_H
+
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+class RuntimeDyldImpl {
+protected:
+  unsigned CPUType;
+  unsigned CPUSubtype;
+
+  // The MemoryManager to load objects into.
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: This all assumes we're dealing with external symbols for anything
+  //        explicitly referenced. I.e., we can index by name and things
+  //        will work out. In practice, this may not be the case, so we
+  //        should find a way to effectively generalize.
+
+  // For each function, we have a MemoryBlock of it's instruction data.
+  StringMap<sys::MemoryBlock> Functions;
+
+  // Master symbol table. As modules are loaded and external symbols are
+  // resolved, their addresses are stored here.
+  StringMap<uint8_t*> SymbolTable;
+
+  bool HasError;
+  std::string ErrorStr;
+
+  // Set the error state and record an error string.
+  bool Error(const Twine &Msg) {
+    ErrorStr = Msg.str();
+    HasError = true;
+    return true;
+  }
+
+  void extractFunction(StringRef Name, uint8_t *StartAddress,
+                       uint8_t *EndAddress);
+
+public:
+  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+  virtual ~RuntimeDyldImpl();
+
+  virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+
+  void *getSymbolAddress(StringRef Name) {
+    // FIXME: Just look up as a function for now. Overly simple of course.
+    // Work in progress.
+    return SymbolTable.lookup(Name);
+  }
+
+  void resolveRelocations();
+
+  virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+
+  // Is the linker in an error state?
+  bool hasError() { return HasError; }
+
+  // Mark the error condition as handled and continue.
+  void clearError() { HasError = false; }
+
+  // Get the error message.
+  StringRef getErrorString() { return ErrorStr; }
+
+  virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+};
+
+
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  struct RelocationEntry {
+    std::string Target;     // Object this relocation is contained in.
+    uint64_t    Offset;     // Offset into the object for the relocation.
+    uint32_t    Data;       // Second word of the raw macho relocation entry.
+    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
+    bool        isResolved; // Has this relocation been resolved previously?
+
+    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
+      : Target(t), Offset(offset), Data(data), Addend(addend),
+        isResolved(false) {}
+  };
+  typedef SmallVector<RelocationEntry, 4> RelocationList;
+  StringMap<RelocationList> Relocations;
+
+  // FIXME: Also keep a map of all the relocations contained in an object. Use
+  // this to dynamically answer whether all of the relocations in it have
+  // been resolved or not.
+
+  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                         unsigned Type, unsigned Size);
+  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                               unsigned Type, unsigned Size);
+  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                            unsigned Type, unsigned Size);
+
+  bool loadSegment32(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+  bool loadSegment64(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  bool loadObject(MemoryBuffer *InputBuffer);
+
+  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+  static bool isKnownFormat(const MemoryBuffer *InputBuffer);
+
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+    return isKnownFormat(InputBuffer);
+  };
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
new file mode 100644
index 000000000000..623e9b2acca3
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -0,0 +1,524 @@
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+bool RuntimeDyldMachO::
+resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                  unsigned Type, unsigned Size) {
+  // This just dispatches to the proper target specific routine.
+  switch (CPUType) {
+  default: assert(0 && "Unsupported CPU type!");
+  case mach::CTM_x86_64:
+    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
+                                   isPCRel, Type, Size);
+  case mach::CTM_ARM:
+    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
+                                isPCRel, Type, Size);
+  }
+  llvm_unreachable("");
+}
+
+bool RuntimeDyldMachO::
+resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
+                        bool isPCRel, unsigned Type,
+                        unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel)
+    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+    // address. Is that expected? Only for branches, perhaps?
+    Value -= Address + 4;
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_X86_64_Unsigned:
+  case macho::RIT_X86_64_Branch: {
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    return false;
+  }
+  case macho::RIT_X86_64_Signed:
+  case macho::RIT_X86_64_GOTLoad:
+  case macho::RIT_X86_64_GOT:
+  case macho::RIT_X86_64_Subtractor:
+  case macho::RIT_X86_64_Signed1:
+  case macho::RIT_X86_64_Signed2:
+  case macho::RIT_X86_64_Signed4:
+  case macho::RIT_X86_64_TLV:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
+                                         bool isPCRel, unsigned Type,
+                                         unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel) {
+    Value -= Address;
+    // ARM PCRel relocations have an effective-PC offset of two instructions
+    // (four bytes in Thumb mode, 8 bytes in ARM mode).
+    // FIXME: For now, assume ARM mode.
+    Value -= 8;
+  }
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_Vanilla: {
+    llvm_unreachable("Invalid relocation type!");
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    break;
+  }
+  case macho::RIT_ARM_Branch24Bit: {
+    // Mask the value into the target address. We know instructions are
+    // 32-bit aligned, so we can do it all at once.
+    uint32_t *p = (uint32_t*)Address;
+    // The low two bits of the value are not encoded.
+    Value >>= 2;
+    // Mask the value to 24 bits.
+    Value &= 0xffffff;
+    // FIXME: If the destination is a Thumb function (and the instruction
+    // is a non-predicated BL instruction), we need to change it to a BLX
+    // instruction instead.
+
+    // Insert the value into the instruction.
+    *p = (*p & ~0xffffff) | Value;
+    break;
+  }
+  case macho::RIT_ARM_ThumbBranch22Bit:
+  case macho::RIT_ARM_ThumbBranch32Bit:
+  case macho::RIT_ARM_Half:
+  case macho::RIT_ARM_HalfDifference:
+  case macho::RIT_Pair:
+  case macho::RIT_Difference:
+  case macho::RIT_ARM_LocalDifference:
+  case macho::RIT_ARM_PreboundLazyPointer:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::
+loadSegment32(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
+  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
+  if (!SegmentLC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > SegmentLC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
+      // doesn't require any special handling here.
+      if (STE->Flags != 0x0 && STE->Flags != 0x8)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
+                                           SegmentLC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      // FIXME: Some targets (ARM) use internal relocations even for
+      // externally visible symbols, if the definition is in the same
+      // file as the reference. We need to convert those back to by-name
+      // references. We can resolve the address based on the section
+      // offset and see if we have a symbol at that address. If we do,
+      // use that; otherwise, puke.
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+
+bool RuntimeDyldMachO::
+loadSegment64(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
+  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
+  if (!Segment64LC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > Segment64LC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      if (STE->Flags != 0x0)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
+                                           Segment64LC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
+  // If the linker is in an error state, don't do anything.
+  if (hasError())
+    return true;
+  // Load the Mach-O wrapper object.
+  std::string ErrorStr;
+  OwningPtr<MachOObject> Obj(
+    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
+  if (!Obj)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  // Get the CPU type information from the header.
+  const macho::Header &Header = Obj->getHeader();
+
+  // FIXME: Error checking that the loaded object is compatible with
+  //        the system we're running on.
+  CPUType = Header.CPUType;
+  CPUSubtype = Header.CPUSubtype;
+
+  // Validate that the load commands match what we expect.
+  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
+    *DysymtabLCI = 0;
+  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
+    switch (LCI.Command.Type) {
+    case macho::LCT_Segment:
+    case macho::LCT_Segment64:
+      if (SegmentLCI)
+        return Error("unexpected input object (multiple segments)");
+      SegmentLCI = &LCI;
+      break;
+    case macho::LCT_Symtab:
+      if (SymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      SymtabLCI = &LCI;
+      break;
+    case macho::LCT_Dysymtab:
+      if (DysymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      DysymtabLCI = &LCI;
+      break;
+    default:
+      return Error("unexpected input object (unexpected load command");
+    }
+  }
+
+  if (!SymtabLCI)
+    return Error("no symbol table found in object");
+  if (!SegmentLCI)
+    return Error("no symbol table found in object");
+
+  // Read and register the symbol table data.
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
+  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
+  if (!SymtabLC)
+    return Error("unable to load symbol table load command");
+  Obj->RegisterStringTable(*SymtabLC);
+
+  // Read the dynamic link-edit information, if present (not present in static
+  // objects).
+  if (DysymtabLCI) {
+    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
+    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
+    if (!DysymtabLC)
+      return Error("unable to load dynamic link-exit load command");
+
+    // FIXME: We don't support anything interesting yet.
+//    if (DysymtabLC->LocalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: local symbol entries");
+//    if (DysymtabLC->ExternalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
+//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
+//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
+  }
+
+  // Load the segment load command.
+  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
+    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  } else {
+    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  }
+
+  return false;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  // Assign the address in our symbol table.
+  SymbolTable[Name] = Addr;
+
+  RelocationList &Relocs = Relocations[Name];
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    RelocationEntry &RE = Relocs[i];
+    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
+    bool isPCRel = (RE.Data >> 24) & 1;
+    unsigned Type = (RE.Data >> 28) & 0xf;
+    unsigned Size = 1 << ((RE.Data >> 25) & 3);
+
+    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
+          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " from '" << Name << " (" << format("%p", Addr) << ")"
+          << "(" << (isPCRel ? "pcrel" : "absolute")
+          << ", type: " << Type << ", Size: " << Size << ").\n");
+
+    resolveRelocation(Target, Addr, isPCRel, Type, Size);
+    RE.isResolved = true;
+  }
+}
+
+bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+  StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") return true;
+  if (Magic == "\xCE\xFA\xED\xFE") return true;
+  if (Magic == "\xFE\xED\xFA\xCF") return true;
+  if (Magic == "\xCF\xFA\xED\xFE") return true;
+  return false;
+}
+
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index a8822e58d40f..f51aff3603b8 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -16,10 +16,10 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -75,9 +75,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (!MCPU.empty() || !MAttrs.empty()) {
+  if (!MAttrs.empty()) {
     SubtargetFeatures Features;
-    Features.setCPU(MCPU);
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
@@ -85,7 +84,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
 
   // Allocate a target...
   TargetMachine *Target =
-    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+    TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr);
   assert(Target && "Could not allocate target machine!");
   return Target;
 }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index f372db2403c9..55aa9bf18887 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -9,337 +9,404 @@
 //
 // This file implements the LLVM module linker.
 //
-// Specifically, this:
-//  * Merges global variables between the two modules
-//    * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if !=
-//  * Merges functions between two modules
-//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/ADT/DenseMap.h"
 using namespace llvm;
 
-// Error - Simple wrapper function to conditionally assign to E and return true.
-// This just makes error return conditions a little bit simpler...
-static inline bool Error(std::string *E, const Twine &Message) {
-  if (E) *E = Message.str();
-  return true;
-}
-
-// Function: ResolveTypes()
-//
-// Description:
-//  Attempt to link the two specified types together.
-//
-// Inputs:
-//  DestTy - The type to which we wish to resolve.
-//  SrcTy  - The original type which we want to resolve.
-//
-// Outputs:
-//  DestST - The symbol table in which the new type should be placed.
-//
-// Return value:
-//  true  - There is an error and the types cannot yet be linked.
-//  false - No errors.
-//
-static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) {
-  if (DestTy == SrcTy) return false;       // If already equal, noop
-  assert(DestTy && SrcTy && "Can't handle null types");
-
-  if (const OpaqueType *OT = dyn_cast<OpaqueType>(DestTy)) {
-    // Type _is_ in module, just opaque...
-    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(SrcTy);
-  } else if (const OpaqueType *OT = dyn_cast<OpaqueType>(SrcTy)) {
-    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(DestTy);
-  } else {
-    return true;  // Cannot link types... not-equal and neither is opaque.
-  }
-  return false;
-}
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
 
-/// LinkerTypeMap - This implements a map of types that is stable
-/// even if types are resolved/refined to other types.  This is not a general
-/// purpose map, it is specific to the linker's use.
 namespace {
-class LinkerTypeMap : public AbstractTypeUser {
-  typedef DenseMap<const Type*, PATypeHolder> TheMapTy;
-  TheMapTy TheMap;
-
-  LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT
-  void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT
+class TypeMapTy : public ValueMapTypeRemapper {
+  /// MappedTypes - This is a mapping from a source type to a destination type
+  /// to use.
+  DenseMap<Type*, Type*> MappedTypes;
+
+  /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
+  /// we speculatively add types to MappedTypes, but keep track of them here in
+  /// case we need to roll back.
+  SmallVector<Type*, 16> SpeculativeTypes;
+  
+  /// DefinitionsToResolve - This is a list of non-opaque structs in the source
+  /// module that are mapped to an opaque struct in the destination module.
+  SmallVector<StructType*, 16> DefinitionsToResolve;
 public:
-  LinkerTypeMap() {}
-  ~LinkerTypeMap() {
-    for (DenseMap<const Type*, PATypeHolder>::iterator I = TheMap.begin(),
-         E = TheMap.end(); I != E; ++I)
-      I->first->removeAbstractTypeUser(this);
-  }
-
-  /// lookup - Return the value for the specified type or null if it doesn't
-  /// exist.
-  const Type *lookup(const Type *Ty) const {
-    TheMapTy::const_iterator I = TheMap.find(Ty);
-    if (I != TheMap.end()) return I->second;
-    return 0;
-  }
-
-  /// insert - This returns true if the pointer was new to the set, false if it
-  /// was already in the set.
-  bool insert(const Type *Src, const Type *Dst) {
-    if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second)
-      return false;  // Already in map.
-    if (Src->isAbstract())
-      Src->addAbstractTypeUser(this);
-    return true;
-  }
-
-protected:
-  /// refineAbstractType - The callback method invoked when an abstract type is
-  /// resolved to another type.  An object must override this method to update
-  /// its internal state to reference NewType instead of OldType.
-  ///
-  virtual void refineAbstractType(const DerivedType *OldTy,
-                                  const Type *NewTy) {
-    TheMapTy::iterator I = TheMap.find(OldTy);
-    const Type *DstTy = I->second;
-
-    TheMap.erase(I);
-    if (OldTy->isAbstract())
-      OldTy->removeAbstractTypeUser(this);
-
-    // Don't reinsert into the map if the key is concrete now.
-    if (NewTy->isAbstract())
-      insert(NewTy, DstTy);
+  
+  /// addTypeMapping - Indicate that the specified type in the destination
+  /// module is conceptually equivalent to the specified type in the source
+  /// module.
+  void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+  /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+  /// module from a type definition in the source module.
+  void linkDefinedTypeBodies();
+  
+  /// get - Return the mapped type to use for the specified input type from the
+  /// source module.
+  Type *get(Type *SrcTy);
+
+  FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+
+private:
+  Type *getImpl(Type *T);
+  /// remapType - Implement the ValueMapTypeRemapper interface.
+  Type *remapType(Type *SrcTy) {
+    return get(SrcTy);
   }
+  
+  bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
 
-  /// The other case which AbstractTypeUsers must be aware of is when a type
-  /// makes the transition from being abstract (where it has clients on it's
-  /// AbstractTypeUsers list) to concrete (where it does not).  This method
-  /// notifies ATU's when this occurs for a type.
-  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
-    TheMap.erase(AbsTy);
-    AbsTy->removeAbstractTypeUser(this);
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry) return;
+  
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
+    return;
   }
-
-  // for debugging...
-  virtual void dump() const {
-    dbgs() << "AbstractTypeSet!\n";
+  
+  // Check to see if these types are recursively isomorphic and establish a
+  // mapping between them if so.
+  if (!areTypesIsomorphic(DstTy, SrcTy)) {
+    // Oops, they aren't isomorphic.  Just discard this request by rolling out
+    // any speculative mappings we've established.
+    for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i)
+      MappedTypes.erase(SpeculativeTypes[i]);
   }
-};
+  SpeculativeTypes.clear();
 }
 
-
-// RecursiveResolveTypes - This is just like ResolveTypes, except that it
-// recurses down into derived types, merging the used types if the parent types
-// are compatible.
-static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
-                                   LinkerTypeMap &Pointers) {
-  if (DstTy == SrcTy) return false;       // If already equal, noop
-
-  // If we found our opaque type, resolve it now!
-  if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
-    return ResolveTypes(DstTy, SrcTy);
-
-  // Two types cannot be resolved together if they are of different primitive
-  // type.  For example, we cannot resolve an int to a float.
-  if (DstTy->getTypeID() != SrcTy->getTypeID()) return true;
-
-  // If neither type is abstract, then they really are just different types.
-  if (!DstTy->isAbstract() && !SrcTy->isAbstract())
-    return true;
-
-  // Otherwise, resolve the used type used by this derived type...
-  switch (DstTy->getTypeID()) {
-  default:
+/// areTypesIsomorphic - Recursively walk this pair of types, returning true
+/// if they are isomorphic, false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+  // Two types with differing kinds are clearly not isomorphic.
+  if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
+
+  // If we have an entry in the MappedTypes table, then we have our answer.
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry)
+    return Entry == DstTy;
+
+  // Two identical types are clearly isomorphic.  Remember this
+  // non-speculatively.
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
     return true;
-  case Type::FunctionTyID: {
-    const FunctionType *DstFT = cast<FunctionType>(DstTy);
-    const FunctionType *SrcFT = cast<FunctionType>(SrcTy);
-    if (DstFT->isVarArg() != SrcFT->isVarArg() ||
-        DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes())
-      return true;
-
-    // Use TypeHolder's so recursive resolution won't break us.
-    PATypeHolder ST(SrcFT), DT(DstFT);
-    for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) {
-      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
-      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
-        return true;
-    }
-    return false;
   }
-  case Type::StructTyID: {
-    const StructType *DstST = cast<StructType>(DstTy);
-    const StructType *SrcST = cast<StructType>(SrcTy);
-    if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes())
+  
+  // Okay, we have two types with identical kinds that we haven't seen before.
+
+  // If this is an opaque struct type, special case it.
+  if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+    // Mapping an opaque type to any struct, just keep the dest struct.
+    if (SSTy->isOpaque()) {
+      Entry = DstTy;
+      SpeculativeTypes.push_back(SrcTy);
       return true;
+    }
 
-    PATypeHolder ST(SrcST), DT(DstST);
-    for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) {
-      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
-      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
-        return true;
+    // Mapping a non-opaque source type to an opaque dest.  Keep the dest, but
+    // fill it in later.  This doesn't need to be speculative.
+    if (cast<StructType>(DstTy)->isOpaque()) {
+      Entry = DstTy;
+      DefinitionsToResolve.push_back(SSTy);
+      return true;
     }
-    return false;
-  }
-  case Type::ArrayTyID: {
-    const ArrayType *DAT = cast<ArrayType>(DstTy);
-    const ArrayType *SAT = cast<ArrayType>(SrcTy);
-    if (DAT->getNumElements() != SAT->getNumElements()) return true;
-    return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(),
-                                  Pointers);
   }
-  case Type::VectorTyID: {
-    const VectorType *DVT = cast<VectorType>(DstTy);
-    const VectorType *SVT = cast<VectorType>(SrcTy);
-    if (DVT->getNumElements() != SVT->getNumElements()) return true;
-    return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(),
-                                  Pointers);
+  
+  // If the number of subtypes disagree between the two types, then we fail.
+  if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+    return false;
+  
+  // Fail if any of the extra properties (e.g. array size) of the type disagree.
+  if (isa<IntegerType>(DstTy))
+    return false;  // bitwidth disagrees.
+  if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+    if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+      return false;
+  } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+    if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+      return false;
+  } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+    StructType *SSTy = cast<StructType>(SrcTy);
+    if (DSTy->isAnonymous() != SSTy->isAnonymous() ||
+        DSTy->isPacked() != SSTy->isPacked())
+      return false;
+  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
+  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+    if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
   }
-  case Type::PointerTyID: {
-    const PointerType *DstPT = cast<PointerType>(DstTy);
-    const PointerType *SrcPT = cast<PointerType>(SrcTy);
 
-    if (DstPT->getAddressSpace() != SrcPT->getAddressSpace())
-      return true;
+  // Otherwise, we speculate that these two types will line up and recursively
+  // check the subelements.
+  Entry = DstTy;
+  SpeculativeTypes.push_back(SrcTy);
+
+  for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i)
+    if (!areTypesIsomorphic(DstTy->getContainedType(i),
+                            SrcTy->getContainedType(i)))
+      return false;
+  
+  // If everything seems to have lined up, then everything is great.
+  return true;
+}
 
-    // If this is a pointer type, check to see if we have already seen it.  If
-    // so, we are in a recursive branch.  Cut off the search now.  We cannot use
-    // an associative container for this search, because the type pointers (keys
-    // in the container) change whenever types get resolved.
-    if (SrcPT->isAbstract())
-      if (const Type *ExistingDestTy = Pointers.lookup(SrcPT))
-        return ExistingDestTy != DstPT;
-
-    if (DstPT->isAbstract())
-      if (const Type *ExistingSrcTy = Pointers.lookup(DstPT))
-        return ExistingSrcTy != SrcPT;
-    // Otherwise, add the current pointers to the vector to stop recursion on
-    // this pair.
-    if (DstPT->isAbstract())
-      Pointers.insert(DstPT, SrcPT);
-    if (SrcPT->isAbstract())
-      Pointers.insert(SrcPT, DstPT);
-
-    return RecursiveResolveTypesI(DstPT->getElementType(),
-                                  SrcPT->getElementType(), Pointers);
-  }
+/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+/// module from a type definition in the source module.
+void TypeMapTy::linkDefinedTypeBodies() {
+  SmallVector<Type*, 16> Elements;
+  SmallString<16> TmpName;
+  
+  // Note that processing entries in this loop (calling 'get') can add new
+  // entries to the DefinitionsToResolve vector.
+  while (!DefinitionsToResolve.empty()) {
+    StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+    StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+    
+    // TypeMap is a many-to-one mapping, if there were multiple types that
+    // provide a body for DstSTy then previous iterations of this loop may have
+    // already handled it.  Just ignore this case.
+    if (!DstSTy->isOpaque()) continue;
+    assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
+    
+    // Map the body of the source type over to a new body for the dest type.
+    Elements.resize(SrcSTy->getNumElements());
+    for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+      Elements[i] = getImpl(SrcSTy->getElementType(i));
+    
+    DstSTy->setBody(Elements, SrcSTy->isPacked());
+    
+    // If DstSTy has no name or has a longer name than STy, then viciously steal
+    // STy's name.
+    if (!SrcSTy->hasName()) continue;
+    StringRef SrcName = SrcSTy->getName();
+    
+    if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
+      TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
+      SrcSTy->setName("");
+      DstSTy->setName(TmpName.str());
+      TmpName.clear();
+    }
   }
 }
 
-static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) {
-  LinkerTypeMap PointerTypes;
-  return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes);
-}
 
+/// get - Return the mapped type to use for the specified input type from the
+/// source module.
+Type *TypeMapTy::get(Type *Ty) {
+  Type *Result = getImpl(Ty);
+  
+  // If this caused a reference to any struct type, resolve it before returning.
+  if (!DefinitionsToResolve.empty())
+    linkDefinedTypeBodies();
+  return Result;
+}
 
-// LinkTypes - Go through the symbol table of the Src module and see if any
-// types are named in the src module that are not named in the Dst module.
-// Make sure there are no type name conflicts.
-static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
-        TypeSymbolTable *DestST = &Dest->getTypeSymbolTable();
-  const TypeSymbolTable *SrcST  = &Src->getTypeSymbolTable();
-
-  // Look for a type plane for Type's...
-  TypeSymbolTable::const_iterator TI = SrcST->begin();
-  TypeSymbolTable::const_iterator TE = SrcST->end();
-  if (TI == TE) return false;  // No named types, do nothing.
-
-  // Some types cannot be resolved immediately because they depend on other
-  // types being resolved to each other first.  This contains a list of types we
-  // are waiting to recheck.
-  std::vector<std::string> DelayedTypesToResolve;
-
-  for ( ; TI != TE; ++TI ) {
-    const std::string &Name = TI->first;
-    const Type *RHS = TI->second;
-
-    // Check to see if this type name is already in the dest module.
-    Type *Entry = DestST->lookup(Name);
-
-    // If the name is just in the source module, bring it over to the dest.
-    if (Entry == 0) {
-      if (!Name.empty())
-        DestST->insert(Name, const_cast<Type*>(RHS));
-    } else if (ResolveTypes(Entry, RHS)) {
-      // They look different, save the types 'till later to resolve.
-      DelayedTypesToResolve.push_back(Name);
+/// getImpl - This is the recursive version of get().
+Type *TypeMapTy::getImpl(Type *Ty) {
+  // If we already have an entry for this type, return it.
+  Type **Entry = &MappedTypes[Ty];
+  if (*Entry) return *Entry;
+  
+  // If this is not a named struct type, then just map all of the elements and
+  // then rebuild the type from inside out.
+  if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) {
+    // If there are no element types to map, then the type is itself.  This is
+    // true for the anonymous {} struct, things like 'float', integers, etc.
+    if (Ty->getNumContainedTypes() == 0)
+      return *Entry = Ty;
+    
+    // Remap all of the elements, keeping track of whether any of them change.
+    bool AnyChange = false;
+    SmallVector<Type*, 4> ElementTypes;
+    ElementTypes.resize(Ty->getNumContainedTypes());
+    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) {
+      ElementTypes[i] = getImpl(Ty->getContainedType(i));
+      AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
+    }
+    
+    // If we found our type while recursively processing stuff, just use it.
+    Entry = &MappedTypes[Ty];
+    if (*Entry) return *Entry;
+    
+    // If all of the element types mapped directly over, then the type is usable
+    // as-is.
+    if (!AnyChange)
+      return *Entry = Ty;
+    
+    // Otherwise, rebuild a modified type.
+    switch (Ty->getTypeID()) {
+    default: assert(0 && "unknown derived type to remap");
+    case Type::ArrayTyID:
+      return *Entry = ArrayType::get(ElementTypes[0],
+                                     cast<ArrayType>(Ty)->getNumElements());
+    case Type::VectorTyID: 
+      return *Entry = VectorType::get(ElementTypes[0],
+                                      cast<VectorType>(Ty)->getNumElements());
+    case Type::PointerTyID:
+      return *Entry = PointerType::get(ElementTypes[0],
+                                      cast<PointerType>(Ty)->getAddressSpace());
+    case Type::FunctionTyID:
+      return *Entry = FunctionType::get(ElementTypes[0],
+                                        ArrayRef<Type*>(ElementTypes).slice(1),
+                                        cast<FunctionType>(Ty)->isVarArg());
+    case Type::StructTyID:
+      // Note that this is only reached for anonymous structs.
+      return *Entry = StructType::get(Ty->getContext(), ElementTypes,
+                                      cast<StructType>(Ty)->isPacked());
     }
   }
 
-  // Iteratively resolve types while we can...
-  while (!DelayedTypesToResolve.empty()) {
-    // Loop over all of the types, attempting to resolve them if possible...
-    unsigned OldSize = DelayedTypesToResolve.size();
-
-    // Try direct resolution by name...
-    for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) {
-      const std::string &Name = DelayedTypesToResolve[i];
-      Type *T1 = SrcST->lookup(Name);
-      Type *T2 = DestST->lookup(Name);
-      if (!ResolveTypes(T2, T1)) {
-        // We are making progress!
-        DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
-        --i;
-      }
-    }
+  // Otherwise, this is an unmapped named struct.  If the struct can be directly
+  // mapped over, just use it as-is.  This happens in a case when the linked-in
+  // module has something like:
+  //   %T = type {%T*, i32}
+  //   @GV = global %T* null
+  // where T does not exist at all in the destination module.
+  //
+  // The other case we watch for is when the type is not in the destination
+  // module, but that it has to be rebuilt because it refers to something that
+  // is already mapped.  For example, if the destination module has:
+  //  %A = type { i32 }
+  // and the source module has something like
+  //  %A' = type { i32 }
+  //  %B = type { %A'* }
+  //  @GV = global %B* null
+  // then we want to create a new type: "%B = type { %A*}" and have it take the
+  // pristine "%B" name from the source module.
+  //
+  // To determine which case this is, we have to recursively walk the type graph
+  // speculating that we'll be able to reuse it unmodified.  Only if this is
+  // safe would we map the entire thing over.  Because this is an optimization,
+  // and is not required for the prettiness of the linked module, we just skip
+  // it and always rebuild a type here.
+  StructType *STy = cast<StructType>(Ty);
+  
+  // If the type is opaque, we can just use it directly.
+  if (STy->isOpaque())
+    return *Entry = STy;
+  
+  // Otherwise we create a new type and resolve its body later.  This will be
+  // resolved by the top level of get().
+  DefinitionsToResolve.push_back(STy);
+  return *Entry = StructType::createNamed(STy->getContext(), "");
+}
 
-    // Did we not eliminate any types?
-    if (DelayedTypesToResolve.size() == OldSize) {
-      // Attempt to resolve subelements of types.  This allows us to merge these
-      // two types: { int* } and { opaque* }
-      for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) {
-        const std::string &Name = DelayedTypesToResolve[i];
-        if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) {
-          // We are making progress!
-          DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
-
-          // Go back to the main loop, perhaps we can resolve directly by name
-          // now...
-          break;
-        }
-      }
 
-      // If we STILL cannot resolve the types, then there is something wrong.
-      if (DelayedTypesToResolve.size() == OldSize) {
-        // Remove the symbol name from the destination.
-        DelayedTypesToResolve.pop_back();
-      }
-    }
-  }
 
+//===----------------------------------------------------------------------===//
+// ModuleLinker implementation.
+//===----------------------------------------------------------------------===//
 
-  return false;
+namespace {
+  /// ModuleLinker - This is an implementation class for the LinkModules
+  /// function, which is the entrypoint for this file.
+  class ModuleLinker {
+    Module *DstM, *SrcM;
+    
+    TypeMapTy TypeMap; 
+
+    /// ValueMap - Mapping of values from what they used to be in Src, to what
+    /// they are now in DstM.  ValueToValueMapTy is a ValueMap, which involves
+    /// some overhead due to the use of Value handles which the Linker doesn't
+    /// actually need, but this allows us to reuse the ValueMapper code.
+    ValueToValueMapTy ValueMap;
+    
+    struct AppendingVarInfo {
+      GlobalVariable *NewGV;  // New aggregate global in dest module.
+      Constant *DstInit;      // Old initializer from dest module.
+      Constant *SrcInit;      // Old initializer from src module.
+    };
+    
+    std::vector<AppendingVarInfo> AppendingVars;
+    
+  public:
+    std::string ErrorMsg;
+    
+    ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { }
+    
+    bool run();
+    
+  private:
+    /// emitError - Helper method for setting a message and returning an error
+    /// code.
+    bool emitError(const Twine &Message) {
+      ErrorMsg = Message.str();
+      return true;
+    }
+    
+    /// getLinkageResult - This analyzes the two global values and determines
+    /// what the result will look like in the destination module.
+    bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                          GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+
+    /// getLinkedToGlobal - Given a global in the source module, return the
+    /// global in the destination module that is being linked to, if any.
+    GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+      // If the source has no name it can't link.  If it has local linkage,
+      // there is no name match-up going on.
+      if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+        return 0;
+      
+      // Otherwise see if we have a match in the destination module's symtab.
+      GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+      if (DGV == 0) return 0;
+        
+      // If we found a global with the same name in the dest module, but it has
+      // internal linkage, we are really not doing any linkage here.
+      if (DGV->hasLocalLinkage())
+        return 0;
+
+      // Otherwise, we do in fact link to the destination global.
+      return DGV;
+    }
+    
+    void computeTypeMapping();
+    
+    bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
+    bool linkGlobalProto(GlobalVariable *SrcGV);
+    bool linkFunctionProto(Function *SrcF);
+    bool linkAliasProto(GlobalAlias *SrcA);
+    
+    void linkAppendingVarInit(const AppendingVarInfo &AVI);
+    void linkGlobalInits();
+    void linkFunctionBody(Function *Dst, Function *Src);
+    void linkAliasBodies();
+    void linkNamedMDNodes();
+  };
 }
 
-/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+
+
+/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
 /// in the symbol table.  This is good for all clients except for us.  Go
 /// through the trouble to force this back.
-static void ForceRenaming(GlobalValue *GV, const std::string &Name) {
-  assert(GV->getName() != Name && "Can't force rename to self");
-  ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable();
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+  // If the global doesn't force its name or if it already has the right name,
+  // there is nothing for us to do.
+  if (GV->hasLocalLinkage() || GV->getName() == Name)
+    return;
+
+  Module *M = GV->getParent();
 
   // If there is a conflict, rename the conflict.
-  if (GlobalValue *ConflictGV = cast_or_null<GlobalValue>(ST.lookup(Name))) {
-    assert(ConflictGV->hasLocalLinkage() &&
-           "Not conflicting with a static global, should link instead!");
+  if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
     GV->takeName(ConflictGV);
     ConflictGV->setName(Name);    // This will cause ConflictGV to get renamed
-    assert(ConflictGV->getName() != Name && "ForceRenaming didn't work");
+    assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
   } else {
     GV->setName(Name);              // Force the name back
   }
@@ -352,30 +419,33 @@ static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
   unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
   DestGV->copyAttributesFrom(SrcGV);
   DestGV->setAlignment(Alignment);
+  
+  forceRenaming(DestGV, SrcGV->getName());
 }
 
-/// GetLinkageResult - This analyzes the two global values and determines what
+/// getLinkageResult - This analyzes the two global values and determines what
 /// the result will look like in the destination module.  In particular, it
 /// computes the resultant linkage type, computes whether the global in the
 /// source should be copied over to the destination (replacing the existing
 /// one), and computes whether this linkage is an error or not. It also performs
 /// visibility checks: we cannot link together two symbols with different
 /// visibilities.
-static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
-                             GlobalValue::LinkageTypes &LT, bool &LinkFromSrc,
-                             std::string *Err) {
-  assert((!Dest || !Src->hasLocalLinkage()) &&
+bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                                    GlobalValue::LinkageTypes &LT, 
+                                    bool &LinkFromSrc) {
+  assert(Dest && "Must have two globals being queried");
+  assert(!Src->hasLocalLinkage() &&
          "If Src has internal linkage, Dest shouldn't be set!");
-  if (!Dest) {
-    // Linking something to nothing.
-    LinkFromSrc = true;
-    LT = Src->getLinkage();
-  } else if (Src->isDeclaration()) {
+  
+  bool SrcIsDeclaration = Src->isDeclaration();
+  bool DestIsDeclaration = Dest->isDeclaration();
+  
+  if (SrcIsDeclaration) {
     // If Src is external or if both Src & Dest are external..  Just link the
     // external globals, we aren't adding anything.
     if (Src->hasDLLImportLinkage()) {
       // If one of GVs has DLLImport linkage, result should be dllimport'ed.
-      if (Dest->isDeclaration()) {
+      if (DestIsDeclaration) {
         LinkFromSrc = true;
         LT = Src->getLinkage();
       }
@@ -387,16 +457,10 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
       LinkFromSrc = false;
       LT = Dest->getLinkage();
     }
-  } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) {
+  } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) {
     // If Dest is external but Src is not:
     LinkFromSrc = true;
     LT = Src->getLinkage();
-  } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) {
-    if (Src->getLinkage() != Dest->getLinkage())
-      return Error(Err, "Linking globals named '" + Src->getName() +
-            "': can only link appending global with another appending global!");
-    LinkFromSrc = true; // Special cased.
-    LT = Src->getLinkage();
   } else if (Src->isWeakForLinker()) {
     // At this point we know that Dest has LinkOnce, External*, Weak, Common,
     // or DLL* linkage.
@@ -420,883 +484,485 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
       LT = GlobalValue::ExternalLinkage;
     }
   } else {
-    assert((Dest->hasExternalLinkage() ||
-            Dest->hasDLLImportLinkage() ||
-            Dest->hasDLLExportLinkage() ||
-            Dest->hasExternalWeakLinkage()) &&
-           (Src->hasExternalLinkage() ||
-            Src->hasDLLImportLinkage() ||
-            Src->hasDLLExportLinkage() ||
-            Src->hasExternalWeakLinkage()) &&
+    assert((Dest->hasExternalLinkage()  || Dest->hasDLLImportLinkage() ||
+            Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) &&
+           (Src->hasExternalLinkage()   || Src->hasDLLImportLinkage() ||
+            Src->hasDLLExportLinkage()  || Src->hasExternalWeakLinkage()) &&
            "Unexpected linkage type!");
-    return Error(Err, "Linking globals named '" + Src->getName() +
+    return emitError("Linking globals named '" + Src->getName() +
                  "': symbol multiply defined!");
   }
 
   // Check visibility
-  if (Dest && Src->getVisibility() != Dest->getVisibility() &&
-      !Src->isDeclaration() && !Dest->isDeclaration() &&
+  if (Src->getVisibility() != Dest->getVisibility() &&
+      !SrcIsDeclaration && !DestIsDeclaration &&
       !Src->hasAvailableExternallyLinkage() &&
       !Dest->hasAvailableExternallyLinkage())
-      return Error(Err, "Linking globals named '" + Src->getName() +
+    return emitError("Linking globals named '" + Src->getName() +
                    "': symbols have different visibilities!");
   return false;
 }
 
-// Insert all of the named mdnoes in Src into the Dest module.
-static void LinkNamedMDNodes(Module *Dest, Module *Src,
-                             ValueToValueMapTy &ValueMap) {
-  for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
-         E = Src->named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *SrcNMD = I;
-    NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
-    // Add Src elements into Dest node.
-    for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
-      DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
-                                                ValueMap)));
+/// computeTypeMapping - Loop over all of the linked values to compute type
+/// mappings.  For example, if we link "extern Foo *x" and "Foo *x = NULL", then
+/// we have two struct types 'Foo' but one got renamed when the module was
+/// loaded into the same LLVMContext.
+void ModuleLinker::computeTypeMapping() {
+  // Incorporate globals.
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    GlobalValue *DGV = getLinkedToGlobal(I);
+    if (DGV == 0) continue;
+    
+    if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
+      continue;      
+    }
+    
+    // Unify the element type of appending arrays.
+    ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+    ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
+    TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+  }
+  
+  // Incorporate functions.
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
+    if (GlobalValue *DGV = getLinkedToGlobal(I))
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
   }
+  
+  // Don't bother incorporating aliases, they aren't generally typed well.
+  
+  // Now that we have discovered all of the type equivalences, get a body for
+  // any 'opaque' types in the dest module that are now resolved. 
+  TypeMap.linkDefinedTypeBodies();
 }
 
-// LinkGlobals - Loop through the global variables in the src module and merge
-// them into the dest module.
-static bool LinkGlobals(Module *Dest, const Module *Src,
-                        ValueToValueMapTy &ValueMap,
-                    std::multimap<std::string, GlobalVariable *> &AppendingVars,
-                        std::string *Err) {
-  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
-
-  // Loop over all of the globals in the src module, mapping them over as we go
-  for (Module::const_global_iterator I = Src->global_begin(),
-       E = Src->global_end(); I != E; ++I) {
-    const GlobalVariable *SGV = I;
-    GlobalValue *DGV = 0;
-
-    // Check to see if may have to link the global with the global, alias or
-    // function.
-    if (SGV->hasName() && !SGV->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getName()));
-
-    // If we found a global with the same name in the dest module, but it has
-    // internal linkage, we are really not doing any linkage here.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = 0;
-
-    // If types don't agree due to opaque types, try to resolve them.
-    if (DGV && DGV->getType() != SGV->getType())
-      RecursiveResolveTypes(SGV->getType(), DGV->getType());
-
-    assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() ||
-            SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) &&
-           "Global must either be external or have an initializer!");
+/// linkAppendingVarProto - If there were any appending global variables, link
+/// them together now.  Return true on error.
+bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+                                         GlobalVariable *SrcGV) {
+ 
+  if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+    return emitError("Linking globals named '" + SrcGV->getName() +
+           "': can only link appending global with another appending global!");
+  
+  ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+  ArrayType *SrcTy =
+    cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+  Type *EltTy = DstTy->getElementType();
+  
+  // Check to see that they two arrays agree on type.
+  if (EltTy != SrcTy->getElementType())
+    return emitError("Appending variables with different element types!");
+  if (DstGV->isConstant() != SrcGV->isConstant())
+    return emitError("Appending variables linked with different const'ness!");
+  
+  if (DstGV->getAlignment() != SrcGV->getAlignment())
+    return emitError(
+             "Appending variables with different alignment need to be linked!");
+  
+  if (DstGV->getVisibility() != SrcGV->getVisibility())
+    return emitError(
+            "Appending variables with different visibility need to be linked!");
+  
+  if (DstGV->getSection() != SrcGV->getSection())
+    return emitError(
+          "Appending variables with different section name need to be linked!");
+  
+  uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
+  ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+  
+  // Create the new global variable.
+  GlobalVariable *NG =
+    new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
+                       DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+                       DstGV->isThreadLocal(),
+                       DstGV->getType()->getAddressSpace());
+  
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NG, DstGV);
+  
+  AppendingVarInfo AVI;
+  AVI.NewGV = NG;
+  AVI.DstInit = DstGV->getInitializer();
+  AVI.SrcInit = SrcGV->getInitializer();
+  AppendingVars.push_back(AVI);
+
+  // Replace any uses of the two global variables with uses of the new
+  // global.
+  ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+  DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+  DstGV->eraseFromParent();
+  
+  // Zap the initializer in the source variable so we don't try to link it.
+  SrcGV->setInitializer(0);
+  SrcGV->setLinkage(GlobalValue::ExternalLinkage);
+  return false;
+}
 
+/// linkGlobalProto - Loop through the global variables in the src module and
+/// merge them into the dest module.
+bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+  GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+  if (DGV) {
+    // Concatenation of appending linkage variables is magic and handled later.
+    if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+      return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+    
+    // Determine whether linkage of these two globals follows the source
+    // module's definition or the destination module's definition.
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
     bool LinkFromSrc = false;
-    if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err))
+    if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
       return true;
 
-    if (DGV == 0) {
-      // No linking to be performed, simply create an identical version of the
-      // symbol over in the dest module... the initializer will be filled in
-      // later by LinkGlobalInits.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           SGV->getName(), 0, false,
-                           SGV->getType()->getAddressSpace());
-      // Propagate alignment, visibility and section info.
-      CopyGVAttributes(NewDGV, SGV);
-      NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr());
-
-      // If the LLVM runtime renamed the global, but it is an externally visible
-      // symbol, DGV must be an existing global with internal linkage.  Rename
-      // it.
-      if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName())
-        ForceRenaming(NewDGV, SGV->getName());
-
-      // Make sure to remember this mapping.
-      ValueMap[SGV] = NewDGV;
-
-      // Keep track that this is an appending variable.
-      if (SGV->hasAppendingLinkage())
-        AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
-      continue;
-    }
-
-    bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr();
-
-    // If the visibilities of the symbols disagree and the destination is a
-    // prototype, take the visibility of its input.
-    if (DGV->isDeclaration())
-      DGV->setVisibility(SGV->getVisibility());
-
-    if (DGV->hasAppendingLinkage()) {
-      // No linking is performed yet.  Just insert a new copy of the global, and
-      // keep track of the fact that it is an appending variable in the
-      // AppendingVars map.  The name is cleared out so that no linkage is
-      // performed.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           "", 0, false,
-                           SGV->getType()->getAddressSpace());
-
-      // Set alignment allowing CopyGVAttributes merge it with alignment of SGV.
-      NewDGV->setAlignment(DGV->getAlignment());
-      // Propagate alignment, section and visibility info.
-      CopyGVAttributes(NewDGV, SGV);
-
-      // Make sure to remember this mapping...
-      ValueMap[SGV] = NewDGV;
-
-      // Keep track that this is an appending variable...
-      AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
-      continue;
-    }
-
-    if (LinkFromSrc) {
-      if (isa<GlobalAlias>(DGV))
-        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
-                     "': symbol multiple defined");
-
-      // If the types don't match, and if we are to link from the source, nuke
-      // DGV and create a new one of the appropriate type.  Note that the thing
-      // we are replacing may be a function (if a prototype, weak, etc) or a
-      // global variable.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), NewLinkage, /*init*/0,
-                           DGV->getName(), 0, false,
-                           SGV->getType()->getAddressSpace());
-
-      // Set the unnamed_addr.
-      NewDGV->setUnnamedAddr(HasUnnamedAddr);
-
-      // Propagate alignment, section, and visibility info.
-      CopyGVAttributes(NewDGV, SGV);
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
-                                                              DGV->getType()));
-
-      // DGV will conflict with NewDGV because they both had the same
-      // name. We must erase this now so ForceRenaming doesn't assert
-      // because DGV might not have internal linkage.
-      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
-        Var->eraseFromParent();
-      else
-        cast<Function>(DGV)->eraseFromParent();
-
-      // If the symbol table renamed the global, but it is an externally visible
-      // symbol, DGV must be an existing global with internal linkage.  Rename.
-      if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage())
-        ForceRenaming(NewDGV, SGV->getName());
-
-      // Inherit const as appropriate.
-      NewDGV->setConstant(SGV->isConstant());
-
+    // If we're not linking from the source, then keep the definition that we
+    // have.
+    if (!LinkFromSrc) {
+      // Special case for const propagation.
+      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+        if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+          DGVar->setConstant(true);
+      
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
       // Make sure to remember this mapping.
-      ValueMap[SGV] = NewDGV;
-      continue;
+      ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
+      
+      // Destroy the source global's initializer (and convert it to a prototype)
+      // so that we don't attempt to copy it over when processing global
+      // initializers.
+      SGV->setInitializer(0);
+      SGV->setLinkage(GlobalValue::ExternalLinkage);
+      return false;
     }
-
-    // Not "link from source", keep the one in the DestModule and remap the
-    // input onto it.
-
-    // Special case for const propagation.
-    if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
-      if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
-        DGVar->setConstant(true);
-
-    // SGV is global, but DGV is alias.
-    if (isa<GlobalAlias>(DGV)) {
-      // The only valid mappings are:
-      // - SGV is external declaration, which is effectively a no-op.
-      // - SGV is weak, when we just need to throw SGV out.
-      if (!SGV->isDeclaration() && !SGV->isWeakForLinker())
-        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
-                     "': symbol multiple defined");
-    }
-
-    // Set calculated linkage and unnamed_addr
-    DGV->setLinkage(NewLinkage);
-    DGV->setUnnamedAddr(HasUnnamedAddr);
-
-    // Make sure to remember this mapping...
-    ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
   }
-  return false;
-}
-
-static GlobalValue::LinkageTypes
-CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
-  GlobalValue::LinkageTypes SL = SGV->getLinkage();
-  GlobalValue::LinkageTypes DL = DGV->getLinkage();
-  if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage)
-    return GlobalValue::ExternalLinkage;
-  else if (SL == GlobalValue::WeakAnyLinkage ||
-           DL == GlobalValue::WeakAnyLinkage)
-    return GlobalValue::WeakAnyLinkage;
-  else if (SL == GlobalValue::WeakODRLinkage ||
-           DL == GlobalValue::WeakODRLinkage)
-    return GlobalValue::WeakODRLinkage;
-  else if (SL == GlobalValue::InternalLinkage &&
-           DL == GlobalValue::InternalLinkage)
-    return GlobalValue::InternalLinkage;
-  else if (SL == GlobalValue::LinkerPrivateLinkage &&
-           DL == GlobalValue::LinkerPrivateLinkage)
-    return GlobalValue::LinkerPrivateLinkage;
-  else if (SL == GlobalValue::LinkerPrivateWeakLinkage &&
-           DL == GlobalValue::LinkerPrivateWeakLinkage)
-    return GlobalValue::LinkerPrivateWeakLinkage;
-  else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage &&
-           DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage)
-    return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
-  else {
-    assert (SL == GlobalValue::PrivateLinkage &&
-            DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
-    return GlobalValue::PrivateLinkage;
+  
+  // No linking to be performed or linking from the source: simply create an
+  // identical version of the symbol over in the dest module... the
+  // initializer will be filled in later by LinkGlobalInits.
+  GlobalVariable *NewDGV =
+    new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
+                       SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+                       SGV->getName(), /*insertbefore*/0,
+                       SGV->isThreadLocal(),
+                       SGV->getType()->getAddressSpace());
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NewDGV, SGV);
+
+  if (DGV) {
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+    DGV->eraseFromParent();
   }
-}
-
-// LinkAlias - Loop through the alias in the src module and link them into the
-// dest module. We're assuming, that all functions/global variables were already
-// linked in.
-static bool LinkAlias(Module *Dest, const Module *Src,
-                      ValueToValueMapTy &ValueMap,
-                      std::string *Err) {
-  // Loop over all alias in the src module
-  for (Module::const_alias_iterator I = Src->alias_begin(),
-         E = Src->alias_end(); I != E; ++I) {
-    const GlobalAlias *SGA = I;
-    const GlobalValue *SAliasee = SGA->getAliasedGlobal();
-    GlobalAlias *NewGA = NULL;
-
-    // Globals were already linked, thus we can just query ValueMap for variant
-    // of SAliasee in Dest.
-    ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee);
-    assert(VMI != ValueMap.end() && "Aliasee not linked");
-    GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
-    GlobalValue* DGV = NULL;
-
-    // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
-    // by this, but aliases to GEPs are broken to a lot of other things, so
-    // it's less important.
-    Constant *DAliaseeConst = DAliasee;
-    if (SGA->getType() != DAliasee->getType())
-      DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType());
-
-    // Try to find something 'similar' to SGA in destination module.
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getNamedAlias(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getGlobalVariable(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getFunction(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    // No linking to be performed on internal stuff.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = NULL;
-
-    if (GlobalAlias *DGA = dyn_cast_or_null<GlobalAlias>(DGV)) {
-      // Types are known to be the same, check whether aliasees equal. As
-      // globals are already linked we just need query ValueMap to find the
-      // mapping.
-      if (DAliasee == DGA->getAliasedGlobal()) {
-        // This is just two copies of the same alias. Propagate linkage, if
-        // necessary.
-        DGA->setLinkage(CalculateAliasLinkage(SGA, DGA));
-
-        NewGA = DGA;
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Alias Collision on '"  + SGA->getName()+
-                     "': aliases have different aliasees");
-    } else if (GlobalVariable *DGVar = dyn_cast_or_null<GlobalVariable>(DGV)) {
-      // The only allowed way is to link alias with external declaration or weak
-      // symbol..
-      if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) {
-        // But only if aliasee is global too...
-        if (!isa<GlobalVariable>(DAliasee))
-          return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
-                       "': aliasee is not global variable");
-
-        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliaseeConst, Dest);
-        CopyGVAttributes(NewGA, SGA);
-
-        // Any uses of DGV need to change to NewGA, with cast, if needed.
-        if (SGA->getType() != DGVar->getType())
-          DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
-                                                             DGVar->getType()));
-        else
-          DGVar->replaceAllUsesWith(NewGA);
-
-        // DGVar will conflict with NewGA because they both had the same
-        // name. We must erase this now so ForceRenaming doesn't assert
-        // because DGV might not have internal linkage.
-        DGVar->eraseFromParent();
-
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
-                     "': symbol multiple defined");
-    } else if (Function *DF = dyn_cast_or_null<Function>(DGV)) {
-      // The only allowed way is to link alias with external declaration or weak
-      // symbol...
-      if (DF->isDeclaration() || DF->isWeakForLinker()) {
-        // But only if aliasee is function too...
-        if (!isa<Function>(DAliasee))
-          return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
-                       "': aliasee is not function");
-
-        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliaseeConst, Dest);
-        CopyGVAttributes(NewGA, SGA);
-
-        // Any uses of DF need to change to NewGA, with cast, if needed.
-        if (SGA->getType() != DF->getType())
-          DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
-                                                          DF->getType()));
-        else
-          DF->replaceAllUsesWith(NewGA);
-
-        // DF will conflict with NewGA because they both had the same
-        // name. We must erase this now so ForceRenaming doesn't assert
-        // because DF might not have internal linkage.
-        DF->eraseFromParent();
-
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
-                     "': symbol multiple defined");
-    } else {
-      // No linking to be performed, simply create an identical version of the
-      // alias over in the dest module...
-      NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                              SGA->getName(), DAliaseeConst, Dest);
-      CopyGVAttributes(NewGA, SGA);
-
-      // Proceed to 'common' steps
-    }
-
-    assert(NewGA && "No alias was created in destination module!");
-
-    // If the symbol table renamed the alias, but it is an externally visible
-    // symbol, DGA must be an global value with internal linkage. Rename it.
-    if (NewGA->getName() != SGA->getName() &&
-        !NewGA->hasLocalLinkage())
-      ForceRenaming(NewGA, SGA->getName());
-
-    // Remember this mapping so uses in the source module get remapped
-    // later by MapValue.
-    ValueMap[SGA] = NewGA;
-  }
-
+  
+  // Make sure to remember this mapping.
+  ValueMap[SGV] = NewDGV;
   return false;
 }
 
+/// linkFunctionProto - Link the function in the source module into the
+/// destination module if needed, setting up mapping information.
+bool ModuleLinker::linkFunctionProto(Function *SF) {
+  GlobalValue *DGV = getLinkedToGlobal(SF);
 
-// LinkGlobalInits - Update the initializers in the Dest module now that all
-// globals that may be referenced are in Dest.
-static bool LinkGlobalInits(Module *Dest, const Module *Src,
-                            ValueToValueMapTy &ValueMap,
-                            std::string *Err) {
-  // Loop over all of the globals in the src module, mapping them over as we go
-  for (Module::const_global_iterator I = Src->global_begin(),
-       E = Src->global_end(); I != E; ++I) {
-    const GlobalVariable *SGV = I;
-
-    if (SGV->hasInitializer()) {      // Only process initialized GV's
-      // Figure out what the initializer looks like in the dest module.
-      Constant *SInit =
-        cast<Constant>(MapValue(SGV->getInitializer(), ValueMap));
-      // Grab destination global variable or alias.
-      GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
-
-      // If dest if global variable, check that initializers match.
-      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) {
-        if (DGVar->hasInitializer()) {
-          if (SGV->hasExternalLinkage()) {
-            if (DGVar->getInitializer() != SInit)
-              return Error(Err, "Global Variable Collision on '" +
-                           SGV->getName() +
-                           "': global variables have different initializers");
-          } else if (DGVar->isWeakForLinker()) {
-            // Nothing is required, mapped values will take the new global
-            // automatically.
-          } else if (SGV->isWeakForLinker()) {
-            // Nothing is required, mapped values will take the new global
-            // automatically.
-          } else if (DGVar->hasAppendingLinkage()) {
-            llvm_unreachable("Appending linkage unimplemented!");
-          } else {
-            llvm_unreachable("Unknown linkage!");
-          }
-        } else {
-          // Copy the initializer over now...
-          DGVar->setInitializer(SInit);
-        }
-      } else {
-        // Destination is alias, the only valid situation is when source is
-        // weak. Also, note, that we already checked linkage in LinkGlobals(),
-        // thus we assert here.
-        // FIXME: Should we weaken this assumption, 'dereference' alias and
-        // check for initializer of aliasee?
-        assert(SGV->isWeakForLinker());
-      }
+  if (DGV) {
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+      return true;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SF->deleteBody();
+      return false;
     }
   }
+  
+  // If there is no linkage to be performed or we are linking from the source,
+  // bring SF over.
+  Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
+                                     SF->getLinkage(), SF->getName(), DstM);
+  CopyGVAttributes(NewDF, SF);
+
+  if (DGV) {
+    // Any uses of DF need to change to NewDF, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SF] = NewDF;
   return false;
 }
 
-// LinkFunctionProtos - Link the functions together between the two modules,
-// without doing function bodies... this just adds external function prototypes
-// to the Dest function...
-//
-static bool LinkFunctionProtos(Module *Dest, const Module *Src,
-                               ValueToValueMapTy &ValueMap,
-                               std::string *Err) {
-  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
-
-  // Loop over all of the functions in the src module, mapping them over
-  for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) {
-    const Function *SF = I;   // SrcFunction
-    GlobalValue *DGV = 0;
-
-    // Check to see if may have to link the function with the global, alias or
-    // function.
-    if (SF->hasName() && !SF->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getName()));
-
-    // If we found a global with the same name in the dest module, but it has
-    // internal linkage, we are really not doing any linkage here.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = 0;
-
-    // If types don't agree due to opaque types, try to resolve them.
-    if (DGV && DGV->getType() != SF->getType())
-      RecursiveResolveTypes(SF->getType(), DGV->getType());
-
+/// LinkAliasProto - Set up prototypes for any aliases that come over from the
+/// source module.
+bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
+  GlobalValue *DGV = getLinkedToGlobal(SGA);
+  
+  if (DGV) {
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
     bool LinkFromSrc = false;
-    if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err))
+    if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
       return true;
-
-    // If there is no linkage to be performed, just bring over SF without
-    // modifying it.
-    if (DGV == 0) {
-      // Function does not already exist, simply insert an function signature
-      // identical to SF into the dest module.
-      Function *NewDF = Function::Create(SF->getFunctionType(),
-                                         SF->getLinkage(),
-                                         SF->getName(), Dest);
-      CopyGVAttributes(NewDF, SF);
-
-      // If the LLVM runtime renamed the function, but it is an externally
-      // visible symbol, DF must be an existing function with internal linkage.
-      // Rename it.
-      if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName())
-        ForceRenaming(NewDF, SF->getName());
-
-      // ... and remember this mapping...
-      ValueMap[SF] = NewDF;
-      continue;
-    }
-
-    // If the visibilities of the symbols disagree and the destination is a
-    // prototype, take the visibility of its input.
-    if (DGV->isDeclaration())
-      DGV->setVisibility(SF->getVisibility());
-
-    if (LinkFromSrc) {
-      if (isa<GlobalAlias>(DGV))
-        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
-                     "': symbol multiple defined");
-
-      // We have a definition of the same name but different type in the
-      // source module. Copy the prototype to the destination and replace
-      // uses of the destination's prototype with the new prototype.
-      Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage,
-                                         SF->getName(), Dest);
-      CopyGVAttributes(NewDF, SF);
-
-      // Any uses of DF need to change to NewDF, with cast
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
-                                                              DGV->getType()));
-
-      // DF will conflict with NewDF because they both had the same. We must
-      // erase this now so ForceRenaming doesn't assert because DF might
-      // not have internal linkage.
-      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
-        Var->eraseFromParent();
-      else
-        cast<Function>(DGV)->eraseFromParent();
-
-      // If the symbol table renamed the function, but it is an externally
-      // visible symbol, DF must be an existing function with internal
-      // linkage.  Rename it.
-      if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage())
-        ForceRenaming(NewDF, SF->getName());
-
-      // Remember this mapping so uses in the source module get remapped
-      // later by MapValue.
-      ValueMap[SF] = NewDF;
-      continue;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SGA->setAliasee(0);
+      return false;
     }
+  }
+  
+  // If there is no linkage to be performed or we're linking from the source,
+  // bring over SGA.
+  GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
+                                       SGA->getLinkage(), SGA->getName(),
+                                       /*aliasee*/0, DstM);
+  CopyGVAttributes(NewDA, SGA);
+
+  if (DGV) {
+    // Any uses of DGV need to change to NewDA, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SGA] = NewDA;
+  return false;
+}
 
-    // Not "link from source", keep the one in the DestModule and remap the
-    // input onto it.
-
-    if (isa<GlobalAlias>(DGV)) {
-      // The only valid mappings are:
-      // - SF is external declaration, which is effectively a no-op.
-      // - SF is weak, when we just need to throw SF out.
-      if (!SF->isDeclaration() && !SF->isWeakForLinker())
-        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
-                     "': symbol multiple defined");
-    }
+void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
+  // Merge the initializer.
+  SmallVector<Constant*, 16> Elements;
+  if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(AVI.DstInit));
+    ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
+    Type *EltTy = DstAT->getElementType();
+    Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  
+  Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
+  if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(SrcInit));
+    ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
+    Type *EltTy = SrcAT->getElementType();
+    Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
+  AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+}
 
-    // Set calculated linkage
-    DGV->setLinkage(NewLinkage);
 
-    // Make sure to remember this mapping.
-    ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType());
+// linkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+void ModuleLinker::linkGlobalInits() {
+  // Loop over all of the globals in the src module, mapping them over as we go
+  for (Module::const_global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    if (!I->hasInitializer()) continue;      // Only process initialized GV's.
+    
+    // Grab destination global variable.
+    GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
+    // Figure out what the initializer looks like in the dest module.
+    DGV->setInitializer(MapValue(I->getInitializer(), ValueMap,
+                                 RF_None, &TypeMap));
   }
-  return false;
 }
 
-// LinkFunctionBody - Copy the source function over into the dest function and
+// linkFunctionBody - Copy the source function over into the dest function and
 // fix up references to values.  At this point we know that Dest is an external
 // function, and that Src is not.
-static bool LinkFunctionBody(Function *Dest, Function *Src,
-                             ValueToValueMapTy &ValueMap,
-                             std::string *Err) {
-  assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
+void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
+  assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
 
   // Go through and convert function arguments over, remembering the mapping.
-  Function::arg_iterator DI = Dest->arg_begin();
+  Function::arg_iterator DI = Dst->arg_begin();
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
        I != E; ++I, ++DI) {
-    DI->setName(I->getName());  // Copy the name information over...
+    DI->setName(I->getName());  // Copy the name over.
 
-    // Add a mapping to our local map
+    // Add a mapping to our mapping.
     ValueMap[I] = DI;
   }
 
   // Splice the body of the source function into the dest function.
-  Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList());
+  Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
 
   // At this point, all of the instructions and values of the function are now
   // copied over.  The only problem is that they are still referencing values in
   // the Source function as operands.  Loop through all of the operands of the
   // functions and patch them up to point to the local versions.
-  for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
+  for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries);
+      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
 
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
        I != E; ++I)
     ValueMap.erase(I);
-
-  return false;
 }
 
 
-// LinkFunctionBodies - Link in the function bodies that are defined in the
-// source module into the DestModule.  This consists basically of copying the
-// function over and fixing up references to values.
-static bool LinkFunctionBodies(Module *Dest, Module *Src,
-                               ValueToValueMapTy &ValueMap,
-                               std::string *Err) {
-
-  // Loop over all of the functions in the src module, mapping them over as we
-  // go
-  for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) {
-    if (!SF->isDeclaration()) {               // No body if function is external
-      Function *DF = dyn_cast<Function>(ValueMap[SF]); // Destination function
-
-      // DF not external SF external?
-      if (DF && DF->isDeclaration())
-        // Only provide the function body if there isn't one already.
-        if (LinkFunctionBody(DF, SF, ValueMap, Err))
-          return true;
+void ModuleLinker::linkAliasBodies() {
+  for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
+       I != E; ++I)
+    if (Constant *Aliasee = I->getAliasee()) {
+      GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
+      DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
     }
-  }
-  return false;
 }
 
-// LinkAppendingVars - If there were any appending global variables, link them
-// together now.  Return true on error.
-static bool LinkAppendingVars(Module *M,
-                  std::multimap<std::string, GlobalVariable *> &AppendingVars,
-                              std::string *ErrorMsg) {
-  if (AppendingVars.empty()) return false; // Nothing to do.
-
-  // Loop over the multimap of appending vars, processing any variables with the
-  // same name, forming a new appending global variable with both of the
-  // initializers merged together, then rewrite references to the old variables
-  // and delete them.
-  std::vector<Constant*> Inits;
-  while (AppendingVars.size() > 1) {
-    // Get the first two elements in the map...
-    std::multimap<std::string,
-      GlobalVariable*>::iterator Second = AppendingVars.begin(), First=Second++;
-
-    // If the first two elements are for different names, there is no pair...
-    // Otherwise there is a pair, so link them together...
-    if (First->first == Second->first) {
-      GlobalVariable *G1 = First->second, *G2 = Second->second;
-      const ArrayType *T1 = cast<ArrayType>(G1->getType()->getElementType());
-      const ArrayType *T2 = cast<ArrayType>(G2->getType()->getElementType());
-
-      // Check to see that they two arrays agree on type...
-      if (T1->getElementType() != T2->getElementType())
-        return Error(ErrorMsg,
-         "Appending variables with different element types need to be linked!");
-      if (G1->isConstant() != G2->isConstant())
-        return Error(ErrorMsg,
-                     "Appending variables linked with different const'ness!");
-
-      if (G1->getAlignment() != G2->getAlignment())
-        return Error(ErrorMsg,
-         "Appending variables with different alignment need to be linked!");
-
-      if (G1->getVisibility() != G2->getVisibility())
-        return Error(ErrorMsg,
-         "Appending variables with different visibility need to be linked!");
-
-      if (G1->getSection() != G2->getSection())
-        return Error(ErrorMsg,
-         "Appending variables with different section name need to be linked!");
-
-      unsigned NewSize = T1->getNumElements() + T2->getNumElements();
-      ArrayType *NewType = ArrayType::get(T1->getElementType(),
-                                                         NewSize);
-
-      G1->setName("");   // Clear G1's name in case of a conflict!
-
-      // Create the new global variable...
-      GlobalVariable *NG =
-        new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(),
-                           /*init*/0, First->first, 0, G1->isThreadLocal(),
-                           G1->getType()->getAddressSpace());
-
-      // Propagate alignment, visibility and section info.
-      CopyGVAttributes(NG, G1);
-
-      // Merge the initializer...
-      Inits.reserve(NewSize);
-      if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) {
-        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
-          Inits.push_back(I->getOperand(i));
-      } else {
-        assert(isa<ConstantAggregateZero>(G1->getInitializer()));
-        Constant *CV = Constant::getNullValue(T1->getElementType());
-        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
-          Inits.push_back(CV);
-      }
-      if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) {
-        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
-          Inits.push_back(I->getOperand(i));
-      } else {
-        assert(isa<ConstantAggregateZero>(G2->getInitializer()));
-        Constant *CV = Constant::getNullValue(T2->getElementType());
-        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
-          Inits.push_back(CV);
-      }
-      NG->setInitializer(ConstantArray::get(NewType, Inits));
-      Inits.clear();
-
-      // Replace any uses of the two global variables with uses of the new
-      // global...
-
-      // FIXME: This should rewrite simple/straight-forward uses such as
-      // getelementptr instructions to not use the Cast!
-      G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
-                             G1->getType()));
-      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
-                             G2->getType()));
-
-      // Remove the two globals from the module now...
-      M->getGlobalList().erase(G1);
-      M->getGlobalList().erase(G2);
-
-      // Put the new global into the AppendingVars map so that we can handle
-      // linking of more than two vars...
-      Second->second = NG;
-    }
-    AppendingVars.erase(First);
+/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// module.
+void ModuleLinker::linkNamedMDNodes() {
+  for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
+       E = SrcM->named_metadata_end(); I != E; ++I) {
+    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+    // Add Src elements into Dest node.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap,
+                                   RF_None, &TypeMap));
   }
-
-  return false;
 }
+  
+bool ModuleLinker::run() {
+  assert(DstM && "Null Destination module");
+  assert(SrcM && "Null Source Module");
 
-static bool ResolveAliases(Module *Dest) {
-  for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end();
-       I != E; ++I)
-    // We can't sue resolveGlobalAlias here because we need to preserve
-    // bitcasts and GEPs.
-    if (const Constant *C = I->getAliasee()) {
-      while (dyn_cast<GlobalAlias>(C))
-        C = cast<GlobalAlias>(C)->getAliasee();
-      const GlobalValue *GV = dyn_cast<GlobalValue>(C);
-      if (C != I && !(GV && GV->isDeclaration()))
-        I->replaceAllUsesWith(const_cast<Constant*>(C));
-    }
-
-  return false;
-}
-
-// LinkModules - This function links two modules together, with the resulting
-// left module modified to be the composite of the two input modules.  If an
-// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
-// the problem.  Upon failure, the Dest module could be in a modified state, and
-// shouldn't be relied on to be consistent.
-bool
-Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
-  assert(Dest != 0 && "Invalid Destination module");
-  assert(Src  != 0 && "Invalid Source Module");
-
-  if (Dest->getDataLayout().empty()) {
-    if (!Src->getDataLayout().empty()) {
-      Dest->setDataLayout(Src->getDataLayout());
-    } else {
-      std::string DataLayout;
-
-      if (Dest->getEndianness() == Module::AnyEndianness) {
-        if (Src->getEndianness() == Module::BigEndian)
-          DataLayout.append("E");
-        else if (Src->getEndianness() == Module::LittleEndian)
-          DataLayout.append("e");
-      }
-
-      if (Dest->getPointerSize() == Module::AnyPointerSize) {
-        if (Src->getPointerSize() == Module::Pointer64)
-          DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64");
-        else if (Src->getPointerSize() == Module::Pointer32)
-          DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32");
-      }
-      Dest->setDataLayout(DataLayout);
-    }
-  }
+  // Inherit the target data from the source module if the destination module
+  // doesn't have one already.
+  if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty())
+    DstM->setDataLayout(SrcM->getDataLayout());
 
   // Copy the target triple from the source to dest if the dest's is empty.
-  if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty())
-    Dest->setTargetTriple(Src->getTargetTriple());
+  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+    DstM->setTargetTriple(SrcM->getTargetTriple());
 
-  if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() &&
-      Src->getDataLayout() != Dest->getDataLayout())
+  if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
+      SrcM->getDataLayout() != DstM->getDataLayout())
     errs() << "WARNING: Linking two modules of different data layouts!\n";
-  if (!Src->getTargetTriple().empty() &&
-      Dest->getTargetTriple() != Src->getTargetTriple()) {
+  if (!SrcM->getTargetTriple().empty() &&
+      DstM->getTargetTriple() != SrcM->getTargetTriple()) {
     errs() << "WARNING: Linking two modules of different target triples: ";
-    if (!Src->getModuleIdentifier().empty())
-      errs() << Src->getModuleIdentifier() << ": ";
-    errs() << "'" << Src->getTargetTriple() << "' and '" 
-           << Dest->getTargetTriple() << "'\n";
+    if (!SrcM->getModuleIdentifier().empty())
+      errs() << SrcM->getModuleIdentifier() << ": ";
+    errs() << "'" << SrcM->getTargetTriple() << "' and '" 
+           << DstM->getTargetTriple() << "'\n";
   }
 
   // Append the module inline asm string.
-  if (!Src->getModuleInlineAsm().empty()) {
-    if (Dest->getModuleInlineAsm().empty())
-      Dest->setModuleInlineAsm(Src->getModuleInlineAsm());
+  if (!SrcM->getModuleInlineAsm().empty()) {
+    if (DstM->getModuleInlineAsm().empty())
+      DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
     else
-      Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+
-                               Src->getModuleInlineAsm());
+      DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
+                               SrcM->getModuleInlineAsm());
   }
 
   // Update the destination module's dependent libraries list with the libraries
   // from the source module. There's no opportunity for duplicates here as the
   // Module ensures that duplicate insertions are discarded.
-  for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end();
+  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
        SI != SE; ++SI)
-    Dest->addLibrary(*SI);
+    DstM->addLibrary(*SI);
+  
+  // If the source library's module id is in the dependent library list of the
+  // destination library, remove it since that module is now linked in.
+  StringRef ModuleId = SrcM->getModuleIdentifier();
+  if (!ModuleId.empty())
+    DstM->removeLibrary(sys::path::stem(ModuleId));
 
-  // LinkTypes - Go through the symbol table of the Src module and see if any
-  // types are named in the src module that are not named in the Dst module.
-  // Make sure there are no type name conflicts.
-  if (LinkTypes(Dest, Src, ErrorMsg))
-    return true;
+  
+  // Loop over all of the linked values to compute type mappings.
+  computeTypeMapping();
 
-  // ValueMap - Mapping of values from what they used to be in Src, to what they
-  // are now in Dest.  ValueToValueMapTy is a ValueMap, which involves some
-  // overhead due to the use of Value handles which the Linker doesn't actually
-  // need, but this allows us to reuse the ValueMapper code.
-  ValueToValueMapTy ValueMap;
-
-  // AppendingVars - Keep track of global variables in the destination module
-  // with appending linkage.  After the module is linked together, they are
-  // appended and the module is rewritten.
-  std::multimap<std::string, GlobalVariable *> AppendingVars;
-  for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end();
-       I != E; ++I) {
-    // Add all of the appending globals already in the Dest module to
-    // AppendingVars.
-    if (I->hasAppendingLinkage())
-      AppendingVars.insert(std::make_pair(I->getName(), I));
-  }
+  // Remap all of the named mdnoes in Src into the DstM module. We do this
+  // after linking GlobalValues so that MDNodes that reference GlobalValues
+  // are properly remapped.
+  linkNamedMDNodes();
 
-  // Insert all of the globals in src into the Dest module... without linking
+  // Insert all of the globals in src into the DstM module... without linking
   // initializers (which could refer to functions not yet mapped over).
-  if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
-    return true;
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I)
+    if (linkGlobalProto(I))
+      return true;
 
   // Link the functions together between the two modules, without doing function
-  // bodies... this just adds external function prototypes to the Dest
+  // bodies... this just adds external function prototypes to the DstM
   // function...  We do this so that when we begin processing function bodies,
   // all of the global values that may be referenced are available in our
   // ValueMap.
-  if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg))
-    return true;
-
-  // If there were any alias, link them now. We really need to do this now,
-  // because all of the aliases that may be referenced need to be available in
-  // ValueMap
-  if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true;
-
-  // Update the initializers in the Dest module now that all globals that may
-  // be referenced are in Dest.
-  if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true;
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
+    if (linkFunctionProto(I))
+      return true;
 
-  // Link in the function bodies that are defined in the source module into the
-  // DestModule.  This consists basically of copying the function over and
-  // fixing up references to values.
-  if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true;
+  // If there were any aliases, link them now.
+  for (Module::alias_iterator I = SrcM->alias_begin(),
+       E = SrcM->alias_end(); I != E; ++I)
+    if (linkAliasProto(I))
+      return true;
 
-  // If there were any appending global variables, link them together now.
-  if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true;
+  for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
+    linkAppendingVarInit(AppendingVars[i]);
+  
+  // Update the initializers in the DstM module now that all globals that may
+  // be referenced are in DstM.
+  linkGlobalInits();
+
+  // Link in the function bodies that are defined in the source module into
+  // DstM.
+  for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
+    if (SF->isDeclaration()) continue;      // No body if function is external.
+    
+    linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+  }
 
-  // Resolve all uses of aliases with aliasees
-  if (ResolveAliases(Dest)) return true;
+  // Resolve all uses of aliases with aliasees.
+  linkAliasBodies();
 
-  // Remap all of the named mdnoes in Src into the Dest module. We do this
-  // after linking GlobalValues so that MDNodes that reference GlobalValues
-  // are properly remapped.
-  LinkNamedMDNodes(Dest, Src, ValueMap);
+  // Now that all of the types from the source are used, resolve any structs
+  // copied over to the dest that didn't exist there.
+  TypeMap.linkDefinedTypeBodies();
+  
+  return false;
+}
 
-  // If the source library's module id is in the dependent library list of the
-  // destination library, remove it since that module is now linked in.
-  const std::string &modId = Src->getModuleIdentifier();
-  if (!modId.empty())
-    Dest->removeLibrary(sys::path::stem(modId));
+//===----------------------------------------------------------------------===//
+// LinkModules entrypoint.
+//===----------------------------------------------------------------------===//
 
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules.  If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem.  Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+  ModuleLinker TheLinker(Dest, Src);
+  if (TheLinker.run()) {
+    if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
+    return true;
+  }
+  
   return false;
 }
-
-// vim: sw=2
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a77ecd3bd8ad..22afa7e91cbe 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,12 +28,14 @@ add_llvm_library(LLVMMC
   MCSectionELF.cpp
   MCSectionMachO.cpp
   MCStreamer.cpp
+  MCSubtargetInfo.cpp
   MCSymbol.cpp
   MCValue.cpp
   MCWin64EH.cpp
   MachObjectWriter.cpp
   WinCOFFStreamer.cpp
   WinCOFFObjectWriter.cpp
+  SubtargetFeature.cpp
   TargetAsmBackend.cpp
   )
 
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 73b259eaa0fe..502b60b0edf4 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -23,6 +23,9 @@
 using namespace llvm;
 
 MCAsmInfo::MCAsmInfo() {
+  PointerSize = 4;
+  IsLittleEndian = true;
+  StackGrowsUp = false;
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
@@ -78,6 +81,7 @@ MCAsmInfo::MCAsmInfo() {
   DwarfRequiresRelocationForSectionOffset = true;
   DwarfSectionOffsetDirective = 0;
   DwarfUsesLabelOffsetForRanges = true;
+  DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
 
   AsmTransCBE = 0;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e8b09fcaced8..d5d08e8f69fb 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -33,8 +34,10 @@ using namespace llvm;
 namespace {
 
 class MCAsmStreamer : public MCStreamer {
+protected:
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
+private:
   OwningPtr<MCInstPrinter> InstPrinter;
   OwningPtr<MCCodeEmitter> Emitter;
   OwningPtr<TargetAsmBackend> AsmBackend;
@@ -134,7 +137,8 @@ public:
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label);
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
   virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
                                          const MCSymbol *Label);
 
@@ -361,9 +365,9 @@ void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
 
 void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                              const MCSymbol *LastLabel,
-                                             const MCSymbol *Label) {
-  EmitDwarfSetLineAddr(LineDelta, Label,
-                       getContext().getTargetAsmInfo().getPointerSize());
+                                             const MCSymbol *Label,
+                                             unsigned PointerSize) {
+  EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
 }
 
 void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -600,7 +604,7 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     int64_t IntValue;
     if (!Value->EvaluateAsAbsolute(IntValue))
       report_fatal_error("Don't know how to emit this value.");
-    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+    if (getContext().getAsmInfo().isLittleEndian()) {
       EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
       EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
     } else {
@@ -822,9 +826,9 @@ void MCAsmStreamer::EmitCFIEndProc() {
 }
 
 void MCAsmStreamer::EmitRegisterName(int64_t Register) {
-  if (InstPrinter) {
-    const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo();
-    unsigned LLVMRegister = asmInfo.getLLVMRegNum(Register, true);
+  if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+    unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true);
     InstPrinter->printRegName(OS, LLVMRegister);
   } else {
     OS << Register;
@@ -1085,7 +1089,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
     }
   }
 
-  // FIXME: Node the fixup comments for Thumb2 are completely bogus since the
+  // FIXME: Note the fixup comments for Thumb2 are completely bogus since the
   // high order halfword of a 32-bit Thumb2 instruction is emitted first.
   OS << "encoding: [";
   for (unsigned i = 0, e = Code.size(); i != e; ++i) {
@@ -1120,7 +1124,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
         unsigned Bit = (Code[i] >> j) & 1;
 
         unsigned FixupBit;
-        if (getContext().getTargetAsmInfo().isLittleEndian())
+        if (getContext().getAsmInfo().isLittleEndian())
           FixupBit = i * 8 + j;
         else
           FixupBit = i * 8 + (7-j);
@@ -1241,13 +1245,12 @@ void MCAsmStreamer::Finish() {
   if (!UseCFI)
     EmitFrames(false);
 }
-
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     bool isVerboseAsm, bool useLoc,
-                                    bool useCFI,
-                                    MCInstPrinter *IP, MCCodeEmitter *CE,
-                                    TargetAsmBackend *TAB, bool ShowInst) {
+                                    bool useCFI, MCInstPrinter *IP,
+                                    MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                    bool ShowInst) {
   return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
                            IP, CE, TAB, ShowInst);
 }
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 6e636f07f1d1..5480b4b12b2c 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -40,6 +40,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   llvm::InitializeAllTargetInfos();
   // FIXME: We shouldn't need to initialize the Target(Machine)s.
   llvm::InitializeAllTargets();
+  llvm::InitializeAllMCAsmInfos();
   llvm::InitializeAllAsmPrinters();
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
@@ -50,16 +51,18 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   assert(TheTarget && "Unable to create target!");
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
   assert(MAI && "Unable to create target asm info!");
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
+  std::string CPU;
 
   // FIXME: We shouldn't need to do this (and link in codegen).
   //        When we split this out, we should do it in a way that makes
   //        it straightforward to switch subtargets on the fly.
-  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr);
+  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU,
+                                                     FeaturesStr);
   assert(TM && "Unable to create target machine!");
 
   // Get the target assembler info needed to setup the context.
@@ -77,7 +80,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant,
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
                                                      *MAI);
   assert(IP && "Unable to create instruction printer!");
 
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 91c5284892a5..bdd99afe1ae4 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -106,6 +107,7 @@ void EDDisassembler::initialize() {
   
   InitializeAllTargetInfos();
   InitializeAllTargets();
+  InitializeAllMCAsmInfos();
   InitializeAllAsmPrinters();
   InitializeAllAsmParsers();
   InitializeAllDisassemblers();
@@ -167,11 +169,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   if (!Tgt)
     return;
   
+  std::string CPU;
   std::string featureString;
-  
-  TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+  TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
                                                featureString));
-  
+
   const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
   
   if (!registerInfo)
@@ -179,11 +181,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
     
   initMaps(*registerInfo);
   
-  AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+  AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
   
   if (!AsmInfo)
     return;
-  
+
   Disassembler.reset(Tgt->createMCDisassembler());
   
   if (!Disassembler)
@@ -193,8 +195,7 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   
   InstString.reset(new std::string);
   InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant,
-                                             *AsmInfo));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
   
   if (!InstPrinter)
     return;
@@ -372,8 +373,11 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
                                                          context, *streamer,
                                                          *AsmInfo));
-  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
-                                                               *TargetMachine));
+
+  StringRef triple = tripleFromArch(Key.Arch);
+  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
+                                                               *genericParser));
   
   AsmToken OpcodeToken = genericParser->Lex();
   AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 2fcc09d4bef0..11d69c151cf9 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -41,6 +41,7 @@ class MCInstPrinter;
 class MCInst;
 class MCParsedAsmOperand;
 class MCStreamer;
+class MCSubtargetInfo;
 template <typename T> class SmallVectorImpl;
 class SourceMgr;
 class Target;
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
index ad5728263133..e43ad1635246 100644
--- a/lib/MC/MCDisassembler/EDInfo.h
+++ b/lib/MC/MCDisassembler/EDInfo.h
@@ -25,8 +25,11 @@ enum OperandTypes {
   kOperandTypeARMBranchTarget,
   kOperandTypeARMSoReg,
   kOperandTypeARMSoImm,
+  kOperandTypeARMRotImm,
   kOperandTypeARMSoImm2Part,
   kOperandTypeARMPredicate,
+  kOperandTypeAddrModeImm12,
+  kOperandTypeLdStSOReg,
   kOperandTypeARMAddrMode2,
   kOperandTypeARMAddrMode2Offset,
   kOperandTypeARMAddrMode3,
@@ -38,13 +41,20 @@ enum OperandTypes {
   kOperandTypeARMAddrMode7,
   kOperandTypeARMAddrModePC,
   kOperandTypeARMRegisterList,
+  kOperandTypeARMDPRRegisterList,
+  kOperandTypeARMSPRRegisterList,
   kOperandTypeARMTBAddrMode,
   kOperandTypeThumbITMask,
-  kOperandTypeThumbAddrModeS1,
-  kOperandTypeThumbAddrModeS2,
-  kOperandTypeThumbAddrModeS4,
+  kOperandTypeThumbAddrModeRegS1,
+  kOperandTypeThumbAddrModeRegS2,
+  kOperandTypeThumbAddrModeRegS4,
+  kOperandTypeThumbAddrModeImmS1,
+  kOperandTypeThumbAddrModeImmS2,
+  kOperandTypeThumbAddrModeImmS4,
   kOperandTypeThumbAddrModeRR,
   kOperandTypeThumbAddrModeSP,
+  kOperandTypeThumbAddrModePC,
+  kOperandTypeThumb2AddrModeReg,
   kOperandTypeThumb2SoReg,
   kOperandTypeThumb2SoImm,
   kOperandTypeThumb2AddrModeImm8,
@@ -52,8 +62,7 @@ enum OperandTypes {
   kOperandTypeThumb2AddrModeImm12,
   kOperandTypeThumb2AddrModeSoReg,
   kOperandTypeThumb2AddrModeImm8s4,
-  kOperandTypeThumb2AddrModeImm8s4Offset,
-  kOperandTypeThumb2AddrModeReg
+  kOperandTypeThumb2AddrModeImm8s4Offset
 };
 
 enum OperandFlags {
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 492bb08f336a..6a4e56ff72c4 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -61,11 +61,14 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
     switch (operandType) {
     default:
     case kOperandTypeARMRegisterList:
+    case kOperandTypeARMDPRRegisterList:
+    case kOperandTypeARMSPRRegisterList:
       break;
     case kOperandTypeImmediate:
     case kOperandTypeRegister:
     case kOperandTypeARMBranchTarget:
     case kOperandTypeARMSoImm:
+    case kOperandTypeARMRotImm:
     case kOperandTypeThumb2SoImm:
     case kOperandTypeARMSoImm2Part:
     case kOperandTypeARMPredicate:
@@ -78,6 +81,7 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
       numMCOperands = 1;
       break;
     case kOperandTypeThumb2SoReg:
+    case kOperandTypeAddrModeImm12:
     case kOperandTypeARMAddrMode2Offset:
     case kOperandTypeARMAddrMode3Offset:
     case kOperandTypeARMAddrMode4:
@@ -86,17 +90,22 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
     case kOperandTypeThumb2AddrModeImm8:
     case kOperandTypeThumb2AddrModeImm12:
     case kOperandTypeThumb2AddrModeImm8s4:
+    case kOperandTypeThumbAddrModeImmS1:
+    case kOperandTypeThumbAddrModeImmS2:
+    case kOperandTypeThumbAddrModeImmS4:
     case kOperandTypeThumbAddrModeRR:
     case kOperandTypeThumbAddrModeSP:
+    case kOperandTypeThumbAddrModePC:
       numMCOperands = 2;
       break;
     case kOperandTypeARMSoReg:
+    case kOperandTypeLdStSOReg:
     case kOperandTypeARMAddrMode2:
     case kOperandTypeARMAddrMode3:
     case kOperandTypeThumb2AddrModeSoReg:
-    case kOperandTypeThumbAddrModeS1:
-    case kOperandTypeThumbAddrModeS2:
-    case kOperandTypeThumbAddrModeS4:
+    case kOperandTypeThumbAddrModeRegS1:
+    case kOperandTypeThumbAddrModeRegS2:
+    case kOperandTypeThumbAddrModeRegS4:
     case kOperandTypeARMAddrMode6Offset:
       numMCOperands = 3;
       break;
@@ -270,9 +279,9 @@ int EDOperand::isMemory() {
   case kOperandTypeARMAddrMode7:
   case kOperandTypeARMAddrModePC:
   case kOperandTypeARMBranchTarget:
-  case kOperandTypeThumbAddrModeS1:
-  case kOperandTypeThumbAddrModeS2:
-  case kOperandTypeThumbAddrModeS4:
+  case kOperandTypeThumbAddrModeRegS1:
+  case kOperandTypeThumbAddrModeRegS2:
+  case kOperandTypeThumbAddrModeRegS4:
   case kOperandTypeThumbAddrModeRR:
   case kOperandTypeThumbAddrModeSP:
   case kOperandTypeThumb2SoImm:
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 13cb81ab441b..ad86db13d510 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,22 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/FoldingSet.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
 #include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
@@ -30,28 +29,27 @@ using namespace llvm;
 #define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
 
 // The maximum address skip amount that can be encoded with a special op.
-#define MAX_SPECIAL_ADDR_DELTA		SPECIAL_ADDR(255)
+#define MAX_SPECIAL_ADDR_DELTA         SPECIAL_ADDR(255)
 
 // First special line opcode - leave room for the standard opcodes.
 // Note: If you want to change this, you'll have to update the
 // "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().  
-#define DWARF2_LINE_OPCODE_BASE		13
+#define DWARF2_LINE_OPCODE_BASE         13
 
 // Minimum line offset in a special line info. opcode.  This value
 // was chosen to give a reasonable range of values.
-#define DWARF2_LINE_BASE		-5
+#define DWARF2_LINE_BASE                -5
 
 // Range of line offsets in a special line info. opcode.
-# define DWARF2_LINE_RANGE		14
+#define DWARF2_LINE_RANGE               14
 
 // Define the architecture-dependent minimum instruction length (in bytes).
 // This value should be rather too small than too big.
-# define DWARF2_LINE_MIN_INSN_LENGTH	1
+#define DWARF2_LINE_MIN_INSN_LENGTH     1
 
 // Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
 // this routine is a nop and will be optimized away.
-static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta)
-{
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) {
   if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
     return AddrDelta;
   if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
@@ -174,7 +172,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
     // At this point we want to emit/create the sequence to encode the delta in
     // line numbers and the increment of the address from the previous Label
     // and the current Label.
-    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+    const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                   asmInfo.getPointerSize());
 
     LastLine = it->getLine();
     LastLabel = Label;
@@ -198,7 +198,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
   // Switch back the the dwarf line section.
   MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
 
-  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd);
+  const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
+                                 asmInfo.getPointerSize());
 }
 
 //
@@ -291,7 +293,7 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   const std::vector<const MCSection *> &MCLineSectionOrder =
     MCOS->getContext().getMCLineSectionOrder();
   for (std::vector<const MCSection*>::const_iterator it =
-	MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+         MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
        ++it) {
     const MCSection *Sec = *it;
     const MCLineSection *Line = MCLineSections.lookup(Sec);
@@ -354,10 +356,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
       OS << char(dwarf::DW_LNS_const_add_pc);
     else {
       OS << char(dwarf::DW_LNS_advance_pc);
-      SmallString<32> Tmp;
-      raw_svector_ostream OSE(Tmp);
-      MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
-      OS << OSE.str();
+      MCObjectWriter::EncodeULEB128(AddrDelta, OS);
     }
     OS << char(dwarf::DW_LNS_extended_op);
     OS << char(1);
@@ -432,25 +431,24 @@ void MCDwarfFile::dump() const {
 
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
   int size = asmInfo.getPointerSize();
-  if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+  if (asmInfo.isStackGrowthDirectionUp())
     return size;
- else
-   return -size;
+  else
+    return -size;
 }
 
 static unsigned getSizeForEncoding(MCStreamer &streamer,
                                    unsigned symbolEncoding) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
   unsigned format = symbolEncoding & 0x0f;
   switch (format) {
   default:
     assert(0 && "Unknown Encoding");
   case dwarf::DW_EH_PE_absptr:
   case dwarf::DW_EH_PE_signed:
-    return asmInfo.getPointerSize();
+    return context.getAsmInfo().getPointerSize();
   case dwarf::DW_EH_PE_udata2:
   case dwarf::DW_EH_PE_sdata2:
     return 2;
@@ -464,13 +462,14 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
 }
 
 static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
-                       unsigned symbolEncoding) {
+                       unsigned symbolEncoding, const char *comment = 0) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
   const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
                                                 symbolEncoding,
                                                 streamer);
   unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
   streamer.EmitAbsValue(v, size);
 }
 
@@ -486,11 +485,11 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
 }
 
 static const MachineLocation TranslateMachineLocation(
-                                                  const TargetAsmInfo &AsmInfo,
+                                                  const TargetAsmInfo &TAI,
                                                   const MachineLocation &Loc) {
   unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
     MachineLocation::VirtualFP :
-    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+    unsigned(TAI.getDwarfRegNum(Loc.getReg(), true));
   const MachineLocation &NewLoc = Loc.isReg() ?
     MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
   return NewLoc;
@@ -503,13 +502,18 @@ namespace {
     bool UsingCFI;
     bool IsEH;
     const MCSymbol *SectionStart;
-
   public:
     FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) :
       CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
       SectionStart(sectionStart) {
     }
 
+    /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+    /// we're successful, return 'true'. Otherwise, return 'false' and it will
+    /// emit the normal CIE and FDE.
+    bool EmitCompactUnwind(MCStreamer &streamer,
+                           const MCDwarfFrameInfo &frame);
+
     const MCSymbol &EmitCIE(MCStreamer &streamer,
                             const MCSymbol *personality,
                             unsigned personalityEncoding,
@@ -524,11 +528,46 @@ namespace {
     void EmitCFIInstruction(MCStreamer &Streamer,
                             const MCCFIInstruction &Instr);
   };
+
+} // end anonymous namespace
+
+static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
+                             StringRef Prefix) {
+  if (Streamer.isVerboseAsm()) {
+    const char *EncStr = 0;
+    switch (Encoding) {
+    default: EncStr = "<unknown encoding>";
+    case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
+    case dwarf::DW_EH_PE_omit:   EncStr = "omit";
+    case dwarf::DW_EH_PE_pcrel:  EncStr = "pcrel";
+    case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
+    case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
+    case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
+    case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
+      EncStr = "indirect pcrel udata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
+      EncStr = "indirect pcrel sdata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
+      EncStr = "indirect pcrel udata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
+      EncStr = "indirect pcrel sdata8";
+    }
+
+    Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
+  }
+
+  Streamer.EmitIntValue(Encoding, 1);
 }
 
 void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
                                           const MCCFIInstruction &Instr) {
   int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+  bool VerboseAsm = Streamer.isVerboseAsm();
 
   switch (Instr.getOperation()) {
   case MCCFIInstruction::Move:
@@ -540,9 +579,13 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     // If advancing cfa.
     if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
       if (Src.getReg() == MachineLocation::VirtualFP) {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
         Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
       } else {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
         Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+        if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
+                                            Twine(Src.getReg()));
         Streamer.EmitULEB128IntValue(Src.getReg());
       }
 
@@ -551,47 +594,62 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
       else
         CFAOffset = -Src.getOffset();
 
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
       Streamer.EmitULEB128IntValue(CFAOffset);
       return;
     }
 
     if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
       assert(Dst.isReg() && "Machine move not supported yet.");
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
       Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
       Streamer.EmitULEB128IntValue(Dst.getReg());
       return;
     }
 
     unsigned Reg = Src.getReg();
-
     int Offset = Dst.getOffset();
     if (IsRelative)
       Offset -= CFAOffset;
     Offset = Offset / dataAlignmentFactor;
 
     if (Offset < 0) {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
       Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitSLEB128IntValue(Offset);
     } else if (Reg < 64) {
+      if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
+                                          Twine(Reg) + ")");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitULEB128IntValue(Offset);
     } else {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
       Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitULEB128IntValue(Offset);
     }
     return;
   }
   case MCCFIInstruction::Remember:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
     return;
   case MCCFIInstruction::Restore:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
     return;
   case MCCFIInstruction::SameValue: {
     unsigned Reg = Instr.getDestination().getReg();
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
     Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+    if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
     Streamer.EmitULEB128IntValue(Reg);
     return;
   }
@@ -614,6 +672,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
     if (BaseLabel && Label) {
       MCSymbol *ThisSym = Label;
       if (ThisSym != BaseLabel) {
+        if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
         streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
         BaseLabel = ThisSym;
       }
@@ -623,40 +682,128 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
   }
 }
 
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+                                         const MCDwarfFrameInfo &Frame) {
+#if 1
+  return false;
+#else
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  bool VerboseAsm = Streamer.isVerboseAsm();
+
+  // range-start range-length  compact-unwind-enc personality-func   lsda
+  //  _foo       LfooEnd-_foo  0x00000023          0                 0
+  //  _bar       LbarEnd-_bar  0x00000025         __gxx_personality  except_tab1
+  //
+  //   .section __LD,__compact_unwind,regular,debug
+  //
+  //   # compact unwind for _foo
+  //   .quad _foo
+  //   .set L1,LfooEnd-_foo
+  //   .long L1
+  //   .long 0x01010001
+  //   .quad 0
+  //   .quad 0
+  //
+  //   # compact unwind for _bar
+  //   .quad _bar
+  //   .set L2,LbarEnd-_bar
+  //   .long L2
+  //   .long 0x01020011
+  //   .quad __gxx_personality
+  //   .quad except_tab1
+
+  uint32_t Encoding =
+    TAI.getCompactUnwindEncoding(Frame.Instructions,
+                                 getDataAlignmentFactor(Streamer), IsEH);
+  if (!Encoding) return false;
+
+  // The encoding needs to know we have an LSDA.
+  if (Frame.Lsda)
+    Encoding |= 0x40000000;
+
+  Streamer.SwitchSection(TAI.getCompactUnwindSection());
+
+  // Range Start
+  unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+  unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+  if (VerboseAsm) Streamer.AddComment("Range Start");
+  Streamer.EmitSymbolValue(Frame.Function, Size);
+
+  // Range Length
+  const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+                                              *Frame.End, 0);
+  if (VerboseAsm) Streamer.AddComment("Range Length");
+  Streamer.EmitAbsValue(Range, 4);
+
+  // Compact Encoding
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
+  if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
+                                      Twine(llvm::utohexstr(Encoding)));
+  Streamer.EmitIntValue(Encoding, Size);
+
+  // Personality Function
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
+  if (VerboseAsm) Streamer.AddComment("Personality Function");
+  if (Frame.Personality)
+    Streamer.EmitSymbolValue(Frame.Personality, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No personality fn
+
+  // LSDA
+  Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
+  if (VerboseAsm) Streamer.AddComment("LSDA");
+  if (Frame.Lsda)
+    Streamer.EmitSymbolValue(Frame.Lsda, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No LSDA
+
+  return true;
+#endif
+}
+
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
                                           const MCSymbol *personality,
                                           unsigned personalityEncoding,
                                           const MCSymbol *lsda,
                                           unsigned lsdaEncoding) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
 
   MCSymbol *sectionStart;
-  if (asmInfo.isFunctionEHFrameSymbolPrivate() || !IsEH)
+  if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH)
     sectionStart = context.CreateTempSymbol();
   else
     sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
 
+  streamer.EmitLabel(sectionStart);
   CIENum++;
 
-  MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
+  MCSymbol *sectionEnd = context.CreateTempSymbol();
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
                                                *sectionEnd, 4);
-  streamer.EmitLabel(sectionStart);
+  if (verboseAsm) streamer.AddComment("CIE Length");
   streamer.EmitAbsValue(Length, 4);
 
   // CIE ID
   unsigned CIE_ID = IsEH ? 0 : -1;
+  if (verboseAsm) streamer.AddComment("CIE ID Tag");
   streamer.EmitIntValue(CIE_ID, 4);
 
   // Version
+  if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
   streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
 
   // Augmentation String
   SmallString<8> Augmentation;
   if (IsEH) {
+    if (verboseAsm) streamer.AddComment("CIE Augmentation");
     Augmentation += "z";
     if (personality)
       Augmentation += "P";
@@ -668,13 +815,16 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
   streamer.EmitIntValue(0, 1);
 
   // Code Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
   streamer.EmitULEB128IntValue(1);
 
   // Data Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
   streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
 
   // Return Address Register
-  streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
+  if (verboseAsm) streamer.AddComment("CIE Return Address Column");
+  streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true));
 
   // Augmentation Data Length (optional)
 
@@ -691,32 +841,38 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
     // Encoding of the FDE pointers
     augmentationLength += 1;
 
+    if (verboseAsm) streamer.AddComment("Augmentation Size");
     streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data (optional)
     if (personality) {
       // Personality Encoding
-      streamer.EmitIntValue(personalityEncoding, 1);
+      EmitEncodingByte(streamer, personalityEncoding,
+                       "Personality Encoding");
       // Personality
+      if (verboseAsm) streamer.AddComment("Personality");
       EmitPersonality(streamer, *personality, personalityEncoding);
     }
+
     if (lsda)
-      streamer.EmitIntValue(lsdaEncoding, 1); // LSDA Encoding
+      EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+
     // Encoding of the FDE pointers
-    streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1);
+    EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI),
+                     "FDE Encoding");
   }
 
   // Initial Instructions
 
-  const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState();
+  const std::vector<MachineMove> &Moves = TAI.getInitialFrameState();
   std::vector<MCCFIInstruction> Instructions;
 
   for (int i = 0, n = Moves.size(); i != n; ++i) {
     MCSymbol *Label = Moves[i].getLabel();
     const MachineLocation &Dst =
-      TranslateMachineLocation(asmInfo, Moves[i].getDestination());
+      TranslateMachineLocation(TAI, Moves[i].getDestination());
     const MachineLocation &Src =
-      TranslateMachineLocation(asmInfo, Moves[i].getSource());
+      TranslateMachineLocation(TAI, Moves[i].getSource());
     MCCFIInstruction Inst(Label, Dst, Src);
     Instructions.push_back(Inst);
   }
@@ -724,7 +880,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
   EmitCFIInstructions(streamer, Instructions, NULL);
 
   // Padding
-  streamer.EmitValueToAlignment(IsEH ? 4 : asmInfo.getPointerSize());
+  streamer.EmitValueToAlignment(IsEH
+                                ? 4 : context.getAsmInfo().getPointerSize());
 
   streamer.EmitLabel(sectionEnd);
   return *sectionStart;
@@ -736,17 +893,19 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   MCContext &context = streamer.getContext();
   MCSymbol *fdeStart = context.CreateTempSymbol();
   MCSymbol *fdeEnd = context.CreateTempSymbol();
-  const TargetAsmInfo &TAsmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
 
-  if (!TAsmInfo.isFunctionEHFrameSymbolPrivate() && IsEH) {
-    MCSymbol *EHSym = context.GetOrCreateSymbol(
-      frame.Function->getName() + Twine(".eh"));
+  if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) {
+    MCSymbol *EHSym =
+      context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
     streamer.EmitEHSymAttributes(frame.Function, EHSym);
     streamer.EmitLabel(EHSym);
   }
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+  if (verboseAsm) streamer.AddComment("FDE Length");
   streamer.EmitAbsValue(Length, 4);
 
   streamer.EmitLabel(fdeStart);
@@ -756,6 +915,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   if (IsEH) {
     const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
                                                  0);
+    if (verboseAsm) streamer.AddComment("FDE CIE Offset");
     streamer.EmitAbsValue(offset, 4);
   } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) {
     const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
@@ -764,18 +924,20 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   } else {
     streamer.EmitSymbolValue(&cieStart, 4);
   }
-  unsigned fdeEncoding = TAsmInfo.getFDEEncoding(UsingCFI);
+
+  unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI);
   unsigned size = getSizeForEncoding(streamer, fdeEncoding);
 
   // PC Begin
   unsigned PCBeginEncoding = IsEH ? fdeEncoding :
     (unsigned)dwarf::DW_EH_PE_absptr;
   unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
-  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding);
+  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
 
   // PC Range
   const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
                                               *frame.End, 0);
+  if (verboseAsm) streamer.AddComment("FDE address range");
   streamer.EmitAbsValue(Range, size);
 
   if (IsEH) {
@@ -785,11 +947,13 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
     if (frame.Lsda)
       augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
 
+    if (verboseAsm) streamer.AddComment("Augmentation size");
     streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data
     if (frame.Lsda)
-      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
+      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
+                 "Language Specific Data Area");
   }
 
   // Call Frame Instructions
@@ -843,39 +1007,47 @@ namespace llvm {
   };
 }
 
-void MCDwarfFrameEmitter::Emit(MCStreamer &streamer,
-                               bool usingCFI,
-                               bool isEH) {
-  MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
-  const MCSection &section = isEH ?
-    *asmInfo.getEHFrameSection() : *asmInfo.getDwarfFrameSection();
-  streamer.SwitchSection(&section);
-  MCSymbol *SectionStart = context.CreateTempSymbol();
-  streamer.EmitLabel(SectionStart);
-
-  MCSymbol *fdeEnd = NULL;
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
+                               bool UsingCFI,
+                               bool IsEH) {
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  const MCSection &Section = IsEH ? *TAI.getEHFrameSection() :
+                                    *TAI.getDwarfFrameSection();
+  Streamer.SwitchSection(&Section);
+  MCSymbol *SectionStart = Context.CreateTempSymbol();
+  Streamer.EmitLabel(SectionStart);
+
+  MCSymbol *FDEEnd = NULL;
   DenseMap<CIEKey, const MCSymbol*> CIEStarts;
-  FrameEmitterImpl Emitter(usingCFI, isEH, SectionStart);
+  FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart);
 
   const MCSymbol *DummyDebugKey = NULL;
-  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
-    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
-    CIEKey key(frame.Personality, frame.PersonalityEncoding,
-               frame.LsdaEncoding);
-    const MCSymbol *&cieStart = isEH ? CIEStarts[key] : DummyDebugKey;
-    if (!cieStart)
-      cieStart = &Emitter.EmitCIE(streamer, frame.Personality,
-                                  frame.PersonalityEncoding, frame.Lsda,
-                                  frame.LsdaEncoding);
-    fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame);
+  for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+    if (IsEH && TAI.getCompactUnwindSection() &&
+        Emitter.EmitCompactUnwind(Streamer, Frame)) {
+      FDEEnd = NULL;
+      continue;
+    }
+
+    CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
+               Frame.LsdaEncoding);
+    const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
+    if (!CIEStart)
+      CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
+                                  Frame.PersonalityEncoding, Frame.Lsda,
+                                  Frame.LsdaEncoding);
+
+    FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+
     if (i != n - 1)
-      streamer.EmitLabel(fdeEnd);
+      Streamer.EmitLabel(FDEEnd);
   }
 
-  streamer.EmitValueToAlignment(asmInfo.getPointerSize());
-  if (fdeEnd)
-    streamer.EmitLabel(fdeEnd);
+  Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+  if (FDEEnd)
+    Streamer.EmitLabel(FDEEnd);
 }
 
 void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index bbb2789ea81c..49340edbed5e 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
index db34d58ec600..855e7e9ca60f 100644
--- a/lib/MC/MCELFStreamer.h
+++ b/lib/MC/MCELFStreamer.h
@@ -138,137 +138,3 @@ private:
 } // end llvm namespace
 
 #endif
-//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits ELF .o object files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELFSTREAMER_H
-#define LLVM_MC_MCELFSTREAMER_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0);
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void Finish();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
-
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
-
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
-
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
-
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
index 46ea9b844a6a..309752ec5f02 100644
--- a/lib/MC/MCLoggingStreamer.cpp
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -85,9 +85,11 @@ public:
 
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label) {
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize) {
     LogCall("EmitDwarfAdvanceLineAddr");
-    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                           PointerSize);
   }
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 12aeb4f48fda..1b21249ca321 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index f38b82231207..9577af010205 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -44,7 +44,8 @@ namespace {
     virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
-                                          const MCSymbol *Label) {}
+                                          const MCSymbol *Label,
+                                          unsigned PointerSize) {}
 
     virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
 
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index e230c5330203..8635aac00302 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -18,7 +18,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -197,9 +196,9 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
 
 void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                                 const MCSymbol *LastLabel,
-                                                const MCSymbol *Label) {
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
   if (!LastLabel) {
-    int PointerSize = getContext().getTargetAsmInfo().getPointerSize();
     EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
     return;
   }
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4f55cea7bc5e..0c181f39611e 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
@@ -84,6 +85,7 @@ private:
   AsmLexer Lexer;
   MCContext &Ctx;
   MCStreamer &Out;
+  const MCAsmInfo &MAI;
   SourceMgr &SrcMgr;
   MCAsmParserExtension *GenericParser;
   MCAsmParserExtension *PlatformParser;
@@ -135,7 +137,7 @@ public:
   virtual MCContext &getContext() { return Ctx; }
   virtual MCStreamer &getStreamer() { return Out; }
 
-  virtual bool Warning(SMLoc L, const Twine &Meg);
+  virtual bool Warning(SMLoc L, const Twine &Msg);
   virtual bool Error(SMLoc L, const Twine &Msg);
 
   const AsmToken &Lex();
@@ -160,8 +162,9 @@ private:
   void HandleMacroExit();
 
   void PrintMacroInstantiations();
-  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
-    SrcMgr.PrintMessage(Loc, Msg, Type);
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+                    bool ShowLine = true) const {
+    SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
   }
 
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
@@ -337,7 +340,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
 
 AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
-  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
+  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
     CurBuffer(0), MacrosEnabled(true) {
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
@@ -466,6 +469,29 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
       TokError("unassigned file number: " + Twine(i) + " for .file directives");
   }
 
+  // Check to see that all assembler local symbols were actually defined.
+  // Targets that don't do subsections via symbols may not want this, though,
+  // so conservatively exclude them. Only do this if we're finalizing, though,
+  // as otherwise we won't necessarilly have seen everything yet.
+  if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+    const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+    for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+         e = Symbols.end();
+         i != e; ++i) {
+      MCSymbol *Sym = i->getValue();
+      // Variable symbols may not be marked as defined, so check those
+      // explicitly. If we know it's a variable, we have a definition for
+      // the purposes of this check.
+      if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+        // FIXME: We would really like to refer back to where the symbol was
+        // first referenced for a source location. We need to add something
+        // to track that. Currently, we just point to the end of the file.
+        PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
+                     Sym->getName() + "' not defined", "error", false);
+    }
+  }
+
+
   // Finalize the output stream if there are no errors and if the client wants
   // us to.
   if (!HadError && !NoFinalize)
@@ -1121,7 +1147,7 @@ bool AsmParser::ParseStatement() {
     if (IDVal == ".weak_def_can_be_hidden")
       return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
 
-    if (IDVal == ".comm")
+    if (IDVal == ".comm" || IDVal == ".common")
       return ParseDirectiveComm(/*IsLocal=*/false);
     if (IDVal == ".lcomm")
       return ParseDirectiveComm(/*IsLocal=*/true);
@@ -1168,7 +1194,7 @@ bool AsmParser::ParseStatement() {
     for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
       if (i != 0)
         OS << ", ";
-      ParsedOperands[i]->dump(OS);
+      ParsedOperands[i]->print(OS);
     }
     OS << "]";
 
@@ -1587,13 +1613,18 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
 
     for (;;) {
       const MCExpr *Value;
+      SMLoc ExprLoc = getLexer().getLoc();
       if (ParseExpression(Value))
         return true;
 
       // Special case constant expressions to match code generator.
-      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
-        getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
-      else
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE);
+      } else
         getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
 
       if (getLexer().is(AsmToken::EndOfStatement))
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 64f635517b11..66ad384c7db2 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -401,14 +401,14 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
 bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
   SMLoc startLoc = getLexer().getLoc();
   if (getLexer().is(AsmToken::Percent)) {
-    const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo();
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
     SMLoc endLoc;
     unsigned LLVMRegNo;
     if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
       return true;
 
     // Check that this is a non-volatile register.
-    const unsigned *NVRegs = asmInfo.getCalleeSavedRegs();
+    const unsigned *NVRegs = TAI.getCalleeSavedRegs();
     unsigned i;
     for (i = 0; NVRegs[i] != 0; ++i)
       if (NVRegs[i] == LLVMRegNo)
@@ -416,7 +416,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
     if (NVRegs[i] == 0)
       return Error(startLoc, "expected non-volatile register");
 
-    int SEHRegNo = asmInfo.getSEHRegNum(LLVMRegNo);
+    int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo);
     if (SEHRegNo < 0)
       return Error(startLoc,"register can't be represented in SEH unwind info");
     RegNo = SEHRegNo;
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 70295efc613c..4030e41036aa 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -12,6 +12,8 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
@@ -41,4 +43,6 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
   return ParseExpression(Res, L);
 }
 
-
+void MCParsedAsmOperand::dump() const {
+  dbgs() << "  " << *this;
+}
diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp
index 8d43c21f4bc9..512f6b044911 100644
--- a/lib/MC/MCParser/TargetAsmParser.cpp
+++ b/lib/MC/MCParser/TargetAsmParser.cpp
@@ -10,8 +10,8 @@
 #include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
-TargetAsmParser::TargetAsmParser(const Target &T) 
-  : TheTarget(T), AvailableFeatures(0)
+TargetAsmParser::TargetAsmParser()
+  : AvailableFeatures(0)
 {
 }
 
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index ae3ed0f3f61a..6e96b78e315b 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,7 +15,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include <cstdlib>
@@ -81,7 +80,7 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
   assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
          "Invalid size");
   char buf[8];
-  const bool isLittleEndian = Context.getTargetAsmInfo().isLittleEndian();
+  const bool isLittleEndian = Context.getAsmInfo().isLittleEndian();
   for (unsigned i = 0; i != Size; ++i) {
     unsigned index = isLittleEndian ? i : (Size - i - 1);
     buf[i] = uint8_t(Value >> (index * 8));
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
new file mode 100644
index 000000000000..86dc1083cee9
--- /dev/null
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -0,0 +1,96 @@
+//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+void
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+                                     const SubtargetFeatureKV *PF,
+                                     const SubtargetFeatureKV *PD,
+                                     const SubtargetInfoKV *PI,
+                                     const InstrStage *IS,
+                                     const unsigned *OC,
+                                     const unsigned *FP,
+                                     unsigned NF, unsigned NP) {
+  TargetTriple = TT;
+  ProcFeatures = PF;
+  ProcDesc = PD;
+  ProcItins = PI;
+  Stages = IS;
+  OperandCycles = OC;
+  ForwardingPathes = FP;
+  NumFeatures = NF;
+  NumProcs = NP;
+
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+}
+
+
+/// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with
+/// feature string) and recompute feature bits.
+uint64_t MCSubtargetInfo::ReInitMCSubtargetInfo(StringRef CPU, StringRef FS) {
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version does not change the implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+  FeatureBits ^= FB;
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version will also change all implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+  SubtargetFeatures Features;
+  FeatureBits = Features.ToggleFeature(FeatureBits, FS,
+                                       ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+  assert(ProcItins && "Instruction itineraries information not available!");
+
+#ifndef NDEBUG
+  for (size_t i = 1; i < NumProcs; i++) {
+    assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 &&
+           "Itineraries table is not sorted");
+  }
+#endif
+
+  // Find entry
+  SubtargetInfoKV KV;
+  KV.Key = CPU.data();
+  const SubtargetInfoKV *Found =
+    std::lower_bound(ProcItins, ProcItins+NumProcs, KV);
+  if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) {
+    errs() << "'" << CPU
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+    return InstrItineraryData();
+  }
+
+  return InstrItineraryData(Stages, OperandCycles, ForwardingPathes,
+                            (InstrItinerary *)Found->Value);
+}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 9453f5c2a963..e698384a49f1 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -225,9 +225,9 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
   // Switch sections (the static function above is meant to be called from
   // here and from Emit().
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
   const MCSection *xdataSect =
-    asmInfo.getWin64EHTableSection(GetSectionSuffix(info->Function));
+    TAI.getWin64EHTableSection(GetSectionSuffix(info->Function));
   streamer.SwitchSection(xdataSect);
 
   llvm::EmitUnwindInfo(streamer, info);
@@ -236,11 +236,11 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
 void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   // Emit the unwind info structs first.
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
   for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
     MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
     const MCSection *xdataSect =
-      asmInfo.getWin64EHTableSection(GetSectionSuffix(info.Function));
+      TAI.getWin64EHTableSection(GetSectionSuffix(info.Function));
     streamer.SwitchSection(xdataSect);
     llvm::EmitUnwindInfo(streamer, &info);
   }
@@ -248,7 +248,7 @@ void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
   for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
     MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
     const MCSection *pdataSect =
-      asmInfo.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
+      TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
     streamer.SwitchSection(pdataSect);
     EmitRuntimeFunction(streamer, &info);
   }
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index f049b1c6e2a4..69efe231ad6e 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -23,34 +23,12 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetAsmBackend.h"
 
-// FIXME: Gross.
-#include "../Target/ARM/ARMFixupKinds.h"
-#include "../Target/X86/X86FixupKinds.h"
-
 #include <vector>
 using namespace llvm;
 using namespace llvm::object;
 
-// FIXME: this has been copied from (or to) X86AsmBackend.cpp
-static unsigned getFixupKindLog2Size(unsigned Kind) {
-  switch (Kind) {
-  default:
-    llvm_unreachable("invalid fixup kind!");
-  case FK_PCRel_1:
-  case FK_Data_1: return 0;
-  case FK_PCRel_2:
-  case FK_Data_2: return 1;
-  case FK_PCRel_4:
-    // FIXME: Remove these!!!
-  case X86::reloc_riprel_4byte:
-  case X86::reloc_riprel_4byte_movq_load:
-  case X86::reloc_signed_4byte:
-  case FK_Data_4: return 2;
-  case FK_Data_8: return 3;
-  }
-}
-
-static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
+bool MachObjectWriter::
+doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
   // Undefined symbols are always extern.
   if (SD->Symbol->isUndefined())
     return true;
@@ -64,1557 +42,740 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
   return false;
 }
 
-namespace {
-
-class MachObjectWriter : public MCObjectWriter {
-  /// MachSymbolData - Helper struct for containing some precomputed information
-  /// on symbols.
-  struct MachSymbolData {
-    MCSymbolData *SymbolData;
-    uint64_t StringIndex;
-    uint8_t SectionIndex;
+bool MachObjectWriter::
+MachSymbolData::operator<(const MachSymbolData &RHS) const {
+  return SymbolData->getSymbol().getName() <
+    RHS.SymbolData->getSymbol().getName();
+}
 
-    // Support lexicographic sorting.
-    bool operator<(const MachSymbolData &RHS) const {
-      return SymbolData->getSymbol().getName() <
-             RHS.SymbolData->getSymbol().getName();
-    }
-  };
+bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+    (MCFixupKind) Kind);
 
-  /// The target specific Mach-O writer instance.
-  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
 
-  /// @name Relocation Data
-  /// @{
+uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
+                                              const MCAsmLayout &Layout) const {
+  return getSectionAddress(Fragment->getParent()) +
+    Layout.getFragmentOffset(Fragment);
+}
 
-  llvm::DenseMap<const MCSectionData*,
-                 std::vector<macho::RelocationEntry> > Relocations;
-  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+                                            const MCAsmLayout &Layout) const {
+  const MCSymbol &S = SD->getSymbol();
+
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
+
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
+
+    uint64_t Address = Target.getConstant();
+    if (Target.getSymA())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymA()->getSymbol()), Layout);
+    if (Target.getSymB())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymB()->getSymbol()), Layout);
+    return Address;
+  }
 
-  /// @}
-  /// @name Symbol Table Data
-  /// @{
+  return getSectionAddress(SD->getFragment()->getParent()) +
+    Layout.getSymbolOffset(SD);
+}
 
-  SmallString<256> StringTable;
-  std::vector<MachSymbolData> LocalSymbolData;
-  std::vector<MachSymbolData> ExternalSymbolData;
-  std::vector<MachSymbolData> UndefinedSymbolData;
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+                                          const MCAsmLayout &Layout) const {
+  uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+  unsigned Next = SD->getLayoutOrder() + 1;
+  if (Next >= Layout.getSectionOrder().size())
+    return 0;
+
+  const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+  if (NextSD.getSection().isVirtualSection())
+    return 0;
+  return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
 
-  /// @}
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+                                   unsigned LoadCommandsSize,
+                                   bool SubsectionsViaSymbols) {
+  uint32_t Flags = 0;
 
-private:
-  /// @name Utility Methods
-  /// @{
+  if (SubsectionsViaSymbols)
+    Flags |= macho::HF_SubsectionsViaSymbols;
 
-  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
-    const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
-      (MCFixupKind) Kind);
+  // struct mach_header (28 bytes) or
+  // struct mach_header_64 (32 bytes)
 
-    return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
-  }
+  uint64_t Start = OS.tell();
+  (void) Start;
 
-  /// @}
+  Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
 
-  SectionAddrMap SectionAddress;
-  uint64_t getSectionAddress(const MCSectionData* SD) const {
-    return SectionAddress.lookup(SD);
-  }
-  uint64_t getSymbolAddress(const MCSymbolData* SD,
-                            const MCAsmLayout &Layout) const {
-    const MCSymbol &S = SD->getSymbol();
-
-    // If this is a variable, then recursively evaluate now.
-    if (S.isVariable()) {
-      MCValue Target;
-      if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
-        report_fatal_error("unable to evaluate offset for variable '" +
-                           S.getName() + "'");
-
-      // Verify that any used symbols are defined.
-      if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymA()->getSymbol().getName() + "'");
-      if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymB()->getSymbol().getName() + "'");
-
-      uint64_t Address = Target.getConstant();
-      if (Target.getSymA())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymA()->getSymbol()), Layout);
-      if (Target.getSymB())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymB()->getSymbol()), Layout);
-      return Address;
-    }
+  Write32(TargetObjectWriter->getCPUType());
+  Write32(TargetObjectWriter->getCPUSubtype());
 
-    return getSectionAddress(SD->getFragment()->getParent()) +
-      Layout.getSymbolOffset(SD);
-  }
-  uint64_t getFragmentAddress(const MCFragment *Fragment,
-                            const MCAsmLayout &Layout) const {
-    return getSectionAddress(Fragment->getParent()) +
-      Layout.getFragmentOffset(Fragment);
-  }
+  Write32(macho::HFT_Object);
+  Write32(NumLoadCommands);
+  Write32(LoadCommandsSize);
+  Write32(Flags);
+  if (is64Bit())
+    Write32(0); // reserved
 
-  uint64_t getPaddingSize(const MCSectionData *SD,
-                          const MCAsmLayout &Layout) const {
-    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
-    unsigned Next = SD->getLayoutOrder() + 1;
-    if (Next >= Layout.getSectionOrder().size())
-      return 0;
-
-    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
-    if (NextSD.getSection().isVirtualSection())
-      return 0;
-    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
-  }
+  assert(OS.tell() - Start ==
+         (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
 
-public:
-  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
-                   bool _IsLittleEndian)
-    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \arg NumSections - The number of sections in this segment.
+/// \arg SectionDataSize - The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+                                               uint64_t VMSize,
+                                               uint64_t SectionDataStartOffset,
+                                               uint64_t SectionDataSize) {
+  // struct segment_command (56 bytes) or
+  // struct segment_command_64 (72 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  unsigned SegmentLoadCommandSize =
+    is64Bit() ? macho::SegmentLoadCommand64Size:
+    macho::SegmentLoadCommand32Size;
+  Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+  Write32(SegmentLoadCommandSize +
+          NumSections * (is64Bit() ? macho::Section64Size :
+                         macho::Section32Size));
+
+  WriteBytes("", 16);
+  if (is64Bit()) {
+    Write64(0); // vmaddr
+    Write64(VMSize); // vmsize
+    Write64(SectionDataStartOffset); // file offset
+    Write64(SectionDataSize); // file size
+  } else {
+    Write32(0); // vmaddr
+    Write32(VMSize); // vmsize
+    Write32(SectionDataStartOffset); // file offset
+    Write32(SectionDataSize); // file size
   }
+  Write32(0x7); // maxprot
+  Write32(0x7); // initprot
+  Write32(NumSections);
+  Write32(0); // flags
 
-  /// @name Target Writer Proxy Accessors
-  /// @{
+  assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
 
-  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
-  bool isARM() const {
-    uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
-    return CPUType == mach::CTM_ARM;
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCSectionData &SD,
+                                    uint64_t FileOffset,
+                                    uint64_t RelocationsStart,
+                                    unsigned NumRelocations) {
+  uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+  // The offset is unused for virtual sections.
+  if (SD.getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+    FileOffset = 0;
   }
 
-  /// @}
-
-  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols) {
-    uint32_t Flags = 0;
-
-    if (SubsectionsViaSymbols)
-      Flags |= macho::HF_SubsectionsViaSymbols;
-
-    // struct mach_header (28 bytes) or
-    // struct mach_header_64 (32 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
-
-    Write32(TargetObjectWriter->getCPUType());
-    Write32(TargetObjectWriter->getCPUSubtype());
-
-    Write32(macho::HFT_Object);
-    Write32(NumLoadCommands);
-    Write32(LoadCommandsSize);
-    Write32(Flags);
-    if (is64Bit())
-      Write32(0); // reserved
-
-    assert(OS.tell() - Start ==
-           (is64Bit() ? macho::Header64Size : macho::Header32Size));
+  // struct section (68 bytes) or
+  // struct section_64 (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+  WriteBytes(Section.getSectionName(), 16);
+  WriteBytes(Section.getSegmentName(), 16);
+  if (is64Bit()) {
+    Write64(getSectionAddress(&SD)); // address
+    Write64(SectionSize); // size
+  } else {
+    Write32(getSectionAddress(&SD)); // address
+    Write32(SectionSize); // size
   }
+  Write32(FileOffset);
+
+  unsigned Flags = Section.getTypeAndAttributes();
+  if (SD.hasInstructions())
+    Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+  assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+  Write32(Log2_32(SD.getAlignment()));
+  Write32(NumRelocations ? RelocationsStart : 0);
+  Write32(NumRelocations);
+  Write32(Flags);
+  Write32(IndirectSymBase.lookup(&SD)); // reserved1
+  Write32(Section.getStubSize()); // reserved2
+  if (is64Bit())
+    Write32(0); // reserved3
+
+  assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+                               macho::Section32Size));
+}
 
-  /// WriteSegmentLoadCommand - Write a segment load command.
-  ///
-  /// \arg NumSections - The number of sections in this segment.
-  /// \arg SectionDataSize - The total size of the sections.
-  void WriteSegmentLoadCommand(unsigned NumSections,
-                               uint64_t VMSize,
-                               uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes) or
-    // struct segment_command_64 (72 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    unsigned SegmentLoadCommandSize =
-      is64Bit() ? macho::SegmentLoadCommand64Size:
-      macho::SegmentLoadCommand32Size;
-    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
-    Write32(SegmentLoadCommandSize +
-            NumSections * (is64Bit() ? macho::Section64Size :
-                           macho::Section32Size));
-
-    WriteBytes("", 16);
-    if (is64Bit()) {
-      Write64(0); // vmaddr
-      Write64(VMSize); // vmsize
-      Write64(SectionDataStartOffset); // file offset
-      Write64(SectionDataSize); // file size
-    } else {
-      Write32(0); // vmaddr
-      Write32(VMSize); // vmsize
-      Write32(SectionDataStartOffset); // file offset
-      Write32(SectionDataSize); // file size
-    }
-    Write32(0x7); // maxprot
-    Write32(0x7); // initprot
-    Write32(NumSections);
-    Write32(0); // flags
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+                                              uint32_t NumSymbols,
+                                              uint32_t StringTableOffset,
+                                              uint32_t StringTableSize) {
+  // struct symtab_command (24 bytes)
 
-    assert(OS.tell() - Start == SegmentLoadCommandSize);
-  }
+  uint64_t Start = OS.tell();
+  (void) Start;
 
-  void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                    const MCSectionData &SD, uint64_t FileOffset,
-                    uint64_t RelocationsStart, unsigned NumRelocations) {
-    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+  Write32(macho::LCT_Symtab);
+  Write32(macho::SymtabLoadCommandSize);
+  Write32(SymbolOffset);
+  Write32(NumSymbols);
+  Write32(StringTableOffset);
+  Write32(StringTableSize);
 
-    // The offset is unused for virtual sections.
-    if (SD.getSection().isVirtualSection()) {
-      assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
-      FileOffset = 0;
-    }
+  assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
 
-    // struct section (68 bytes) or
-    // struct section_64 (80 bytes)
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                                uint32_t NumLocalSymbols,
+                                                uint32_t FirstExternalSymbol,
+                                                uint32_t NumExternalSymbols,
+                                                uint32_t FirstUndefinedSymbol,
+                                                uint32_t NumUndefinedSymbols,
+                                                uint32_t IndirectSymbolOffset,
+                                                uint32_t NumIndirectSymbols) {
+  // struct dysymtab_command (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_Dysymtab);
+  Write32(macho::DysymtabLoadCommandSize);
+  Write32(FirstLocalSymbol);
+  Write32(NumLocalSymbols);
+  Write32(FirstExternalSymbol);
+  Write32(NumExternalSymbols);
+  Write32(FirstUndefinedSymbol);
+  Write32(NumUndefinedSymbols);
+  Write32(0); // tocoff
+  Write32(0); // ntoc
+  Write32(0); // modtaboff
+  Write32(0); // nmodtab
+  Write32(0); // extrefsymoff
+  Write32(0); // nextrefsyms
+  Write32(IndirectSymbolOffset);
+  Write32(NumIndirectSymbols);
+  Write32(0); // extreloff
+  Write32(0); // nextrel
+  Write32(0); // locreloff
+  Write32(0); // nlocrel
+
+  assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
 
-    uint64_t Start = OS.tell();
-    (void) Start;
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &Data = *MSD.SymbolData;
+  const MCSymbol &Symbol = Data.getSymbol();
+  uint8_t Type = 0;
+  uint16_t Flags = Data.getFlags();
+  uint32_t Address = 0;
 
-    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
-    WriteBytes(Section.getSectionName(), 16);
-    WriteBytes(Section.getSegmentName(), 16);
-    if (is64Bit()) {
-      Write64(getSectionAddress(&SD)); // address
-      Write64(SectionSize); // size
+  // Set the N_TYPE bits. See <mach-o/nlist.h>.
+  //
+  // FIXME: Are the prebound or indirect fields possible here?
+  if (Symbol.isUndefined())
+    Type = macho::STT_Undefined;
+  else if (Symbol.isAbsolute())
+    Type = macho::STT_Absolute;
+  else
+    Type = macho::STT_Section;
+
+  // FIXME: Set STAB bits.
+
+  if (Data.isPrivateExtern())
+    Type |= macho::STF_PrivateExtern;
+
+  // Set external bit.
+  if (Data.isExternal() || Symbol.isUndefined())
+    Type |= macho::STF_External;
+
+  // Compute the symbol address.
+  if (Symbol.isDefined()) {
+    if (Symbol.isAbsolute()) {
+      Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
     } else {
-      Write32(getSectionAddress(&SD)); // address
-      Write32(SectionSize); // size
+      Address = getSymbolAddress(&Data, Layout);
+    }
+  } else if (Data.isCommon()) {
+    // Common symbols are encoded with the size in the address
+    // field, and their alignment in the flags.
+    Address = Data.getCommonSize();
+
+    // Common alignment is packed into the 'desc' bits.
+    if (unsigned Align = Data.getCommonAlignment()) {
+      unsigned Log2Size = Log2_32(Align);
+      assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+      if (Log2Size > 15)
+        report_fatal_error("invalid 'common' alignment '" +
+                           Twine(Align) + "'");
+      // FIXME: Keep this mask with the SymbolFlags enumeration.
+      Flags = (Flags & 0xF0FF) | (Log2Size << 8);
     }
-    Write32(FileOffset);
-
-    unsigned Flags = Section.getTypeAndAttributes();
-    if (SD.hasInstructions())
-      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
-
-    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-    Write32(Log2_32(SD.getAlignment()));
-    Write32(NumRelocations ? RelocationsStart : 0);
-    Write32(NumRelocations);
-    Write32(Flags);
-    Write32(IndirectSymBase.lookup(&SD)); // reserved1
-    Write32(Section.getStubSize()); // reserved2
-    if (is64Bit())
-      Write32(0); // reserved3
-
-    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
-           macho::Section32Size));
   }
 
-  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
-                              uint32_t StringTableOffset,
-                              uint32_t StringTableSize) {
-    // struct symtab_command (24 bytes)
+  // struct nlist (12 bytes)
 
-    uint64_t Start = OS.tell();
-    (void) Start;
+  Write32(MSD.StringIndex);
+  Write8(Type);
+  Write8(MSD.SectionIndex);
 
-    Write32(macho::LCT_Symtab);
-    Write32(macho::SymtabLoadCommandSize);
-    Write32(SymbolOffset);
-    Write32(NumSymbols);
-    Write32(StringTableOffset);
-    Write32(StringTableSize);
-
-    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
-  }
-
-  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
-                                uint32_t NumLocalSymbols,
-                                uint32_t FirstExternalSymbol,
-                                uint32_t NumExternalSymbols,
-                                uint32_t FirstUndefinedSymbol,
-                                uint32_t NumUndefinedSymbols,
-                                uint32_t IndirectSymbolOffset,
-                                uint32_t NumIndirectSymbols) {
-    // struct dysymtab_command (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(macho::LCT_Dysymtab);
-    Write32(macho::DysymtabLoadCommandSize);
-    Write32(FirstLocalSymbol);
-    Write32(NumLocalSymbols);
-    Write32(FirstExternalSymbol);
-    Write32(NumExternalSymbols);
-    Write32(FirstUndefinedSymbol);
-    Write32(NumUndefinedSymbols);
-    Write32(0); // tocoff
-    Write32(0); // ntoc
-    Write32(0); // modtaboff
-    Write32(0); // nmodtab
-    Write32(0); // extrefsymoff
-    Write32(0); // nextrefsyms
-    Write32(IndirectSymbolOffset);
-    Write32(NumIndirectSymbols);
-    Write32(0); // extreloff
-    Write32(0); // nextrel
-    Write32(0); // locreloff
-    Write32(0); // nlocrel
-
-    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
-  }
-
-  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
-    MCSymbolData &Data = *MSD.SymbolData;
-    const MCSymbol &Symbol = Data.getSymbol();
-    uint8_t Type = 0;
-    uint16_t Flags = Data.getFlags();
-    uint32_t Address = 0;
+  // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+  // value.
+  Write16(Flags);
+  if (is64Bit())
+    Write64(Address);
+  else
+    Write32(Address);
+}
 
-    // Set the N_TYPE bits. See <mach-o/nlist.h>.
-    //
-    // FIXME: Are the prebound or indirect fields possible here?
-    if (Symbol.isUndefined())
-      Type = macho::STT_Undefined;
-    else if (Symbol.isAbsolute())
-      Type = macho::STT_Absolute;
-    else
-      Type = macho::STT_Section;
-
-    // FIXME: Set STAB bits.
-
-    if (Data.isPrivateExtern())
-      Type |= macho::STF_PrivateExtern;
-
-    // Set external bit.
-    if (Data.isExternal() || Symbol.isUndefined())
-      Type |= macho::STF_External;
-
-    // Compute the symbol address.
-    if (Symbol.isDefined()) {
-      if (Symbol.isAbsolute()) {
-        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
-      } else {
-        Address = getSymbolAddress(&Data, Layout);
-      }
-    } else if (Data.isCommon()) {
-      // Common symbols are encoded with the size in the address
-      // field, and their alignment in the flags.
-      Address = Data.getCommonSize();
-
-      // Common alignment is packed into the 'desc' bits.
-      if (unsigned Align = Data.getCommonAlignment()) {
-        unsigned Log2Size = Log2_32(Align);
-        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
-        if (Log2Size > 15)
-          report_fatal_error("invalid 'common' alignment '" +
-                            Twine(Align) + "'");
-        // FIXME: Keep this mask with the SymbolFlags enumeration.
-        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
-      }
-    }
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup,
+                                        MCValue Target,
+                                        uint64_t &FixedValue) {
+  TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+                                       Target, FixedValue);
+}
 
-    // struct nlist (12 bytes)
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+  // This is the point where 'as' creates actual symbols for indirect symbols
+  // (in the following two passes). It would be easier for us to do this sooner
+  // when we see the attribute, but that makes getting the order in the symbol
+  // table much more complicated than it is worth.
+  //
+  // FIXME: Revisit this when the dust settles.
 
-    Write32(MSD.StringIndex);
-    Write8(Type);
-    Write8(MSD.SectionIndex);
+  // Bind non lazy symbol pointers first.
+  unsigned IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
-    // value.
-    Write16(Flags);
-    if (is64Bit())
-      Write64(Address);
-    else
-      Write32(Address);
-  }
+    if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+      continue;
 
-  // FIXME: We really need to improve the relocation validation. Basically, we
-  // want to implement a separate computation which evaluates the relocation
-  // entry as the linker would, and verifies that the resultant fixup value is
-  // exactly what the encoder wanted. This will catch several classes of
-  // problems:
-  //
-  //  - Relocation entry bugs, the two algorithms are unlikely to have the same
-  //    exact bug.
-  //
-  //  - Relaxation issues, where we forget to relax something.
-  //
-  //  - Input errors, where something cannot be correctly encoded. 'as' allows
-  //    these through in many cases.
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
 
-  static bool isFixupKindRIPRel(unsigned Kind) {
-    return Kind == X86::reloc_riprel_4byte ||
-      Kind == X86::reloc_riprel_4byte_movq_load;
+    Asm.getOrCreateSymbolData(*it->Symbol);
   }
-  void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                              const MCFragment *Fragment,
-                              const MCFixup &Fixup, MCValue Target,
-                              uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // See <reloc.h>.
-    uint32_t FixupOffset =
-      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-    uint32_t FixupAddress =
-      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-    int64_t Value = 0;
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    Value = Target.getConstant();
-
-    if (IsPCRel) {
-      // Compensate for the relocation offset, Darwin x86_64 relocations only
-      // have the addend and appear to have attempted to define it to be the
-      // actual expression addend without the PCrel bias. However, instructions
-      // with data following the relocation are not accommodated for (see comment
-      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
-      Value += 1LL << Log2Size;
-    }
 
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      Type = macho::RIT_X86_64_Unsigned;
-      Index = 0;
-
-      // FIXME: I believe this is broken, I don't think the linker can
-      // understand it. I think it would require a local relocation, but I'm not
-      // sure if that would work either. The official way to get an absolute
-      // PCrel relocation is to use an absolute symbol (which we don't support
-      // yet).
-      if (IsPCRel) {
-        IsExtern = 1;
-        Type = macho::RIT_X86_64_Branch;
-      }
-    } else if (Target.getSymB()) { // A - B + constant
-      const MCSymbol *A = &Target.getSymA()->getSymbol();
-      MCSymbolData &A_SD = Asm.getSymbolData(*A);
-      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
-
-      const MCSymbol *B = &Target.getSymB()->getSymbol();
-      MCSymbolData &B_SD = Asm.getSymbolData(*B);
-      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
-
-      // Neither symbol can be modified.
-      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-        report_fatal_error("unsupported relocation of modified symbol");
-
-      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
-      // implement most of these correctly.
-      if (IsPCRel)
-        report_fatal_error("unsupported pc-relative relocation of difference");
-
-      // The support for the situation where one or both of the symbols would
-      // require a local relocation is handled just like if the symbols were
-      // external.  This is certainly used in the case of debug sections where
-      // the section has only temporary symbols and thus the symbols don't have
-      // base symbols.  This is encoded using the section ordinal and
-      // non-extern relocation entries.
-
-      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
-      // a single SIGNED relocation); reject it for now.  Except the case where
-      // both symbols don't have a base, equal but both NULL.
-      if (A_Base == B_Base && A_Base)
-        report_fatal_error("unsupported relocation with identical base");
-
-      Value += getSymbolAddress(&A_SD, Layout) -
-        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
-      Value -= getSymbolAddress(&B_SD, Layout) -
-        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
-
-      if (A_Base) {
-        Index = A_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Unsigned;
-
-      macho::RelocationEntry MRE;
-      MRE.Word0 = FixupOffset;
-      MRE.Word1 = ((Index     <<  0) |
-                   (IsPCRel   << 24) |
-                   (Log2Size  << 25) |
-                   (IsExtern  << 27) |
-                   (Type      << 28));
-      Relocations[Fragment->getParent()].push_back(MRE);
-
-      if (B_Base) {
-        Index = B_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Subtractor;
-    } else {
-      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-      const MCSymbolData *Base = Asm.getAtom(&SD);
-
-      // Relocations inside debug sections always use local relocations when
-      // possible. This seems to be done because the debugger doesn't fully
-      // understand x86_64 relocation entries, and expects to find values that
-      // have already been fixed up.
-      if (Symbol->isInSection()) {
-        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
-          Fragment->getParent()->getSection());
-        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
-          Base = 0;
-      }
+  // Then lazy symbol pointers and symbol stubs.
+  IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-      // x86_64 almost always uses external relocations, except when there is no
-      // symbol to use as a base address (a local symbol with no preceding
-      // non-local symbol).
-      if (Base) {
-        Index = Base->getIndex();
-        IsExtern = 1;
-
-        // Add the local offset, if needed.
-        if (Base != &SD)
-          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
-      } else if (Symbol->isInSection() && !Symbol->isVariable()) {
-        // The index is the section ordinal (1-based).
-        Index = SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-        Value += getSymbolAddress(&SD, Layout);
-
-        if (IsPCRel)
-          Value -= FixupAddress + (1 << Log2Size);
-      } else if (Symbol->isVariable()) {
-        const MCExpr *Value = Symbol->getVariableValue();
-        int64_t Res;
-        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
-        if (isAbs) {
-          FixedValue = Res;
-          return;
-        } else {
-          report_fatal_error("unsupported relocation of variable '" +
-                             Symbol->getName() + "'");
-        }
-      } else {
-        report_fatal_error("unsupported relocation of undefined symbol '" +
-                           Symbol->getName() + "'");
-      }
+    if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+        Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+      continue;
 
-      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
-      if (IsPCRel) {
-        if (IsRIPRel) {
-          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
-            // rewrite the movq to an leaq at link time if the symbol ends up in
-            // the same linkage unit.
-            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
-              Type = macho::RIT_X86_64_GOTLoad;
-            else
-              Type = macho::RIT_X86_64_GOT;
-          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-            Type = macho::RIT_X86_64_TLV;
-          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
-            report_fatal_error("unsupported symbol modifier in relocation");
-          } else {
-            Type = macho::RIT_X86_64_Signed;
-
-            // The Darwin x86_64 relocation format has a problem where it cannot
-            // encode an address (L<foo> + <constant>) which is outside the atom
-            // containing L<foo>. Generally, this shouldn't occur but it does
-            // happen when we have a RIPrel instruction with data following the
-            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
-            // adjustment Darwin x86_64 uses, the offset is still negative and
-            // the linker has no way to recognize this.
-            //
-            // To work around this, Darwin uses several special relocation types
-            // to indicate the offsets. However, the specification or
-            // implementation of these seems to also be incomplete; they should
-            // adjust the addend as well based on the actual encoded instruction
-            // (the additional bias), but instead appear to just look at the
-            // final offset.
-            switch (-(Target.getConstant() + (1LL << Log2Size))) {
-            case 1: Type = macho::RIT_X86_64_Signed1; break;
-            case 2: Type = macho::RIT_X86_64_Signed2; break;
-            case 4: Type = macho::RIT_X86_64_Signed4; break;
-            }
-          }
-        } else {
-          if (Modifier != MCSymbolRefExpr::VK_None)
-            report_fatal_error("unsupported symbol modifier in branch "
-                              "relocation");
-
-          Type = macho::RIT_X86_64_Branch;
-        }
-      } else {
-        if (Modifier == MCSymbolRefExpr::VK_GOT) {
-          Type = macho::RIT_X86_64_GOT;
-        } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-          // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
-          // which case all we do is set the PCrel bit in the relocation entry;
-          // this is used with exception handling, for example. The source is
-          // required to include any necessary offset directly.
-          Type = macho::RIT_X86_64_GOT;
-          IsPCRel = 1;
-        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-          report_fatal_error("TLVP symbol modifier should have been rip-rel");
-        } else if (Modifier != MCSymbolRefExpr::VK_None)
-          report_fatal_error("unsupported symbol modifier in relocation");
-        else
-          Type = macho::RIT_X86_64_Unsigned;
-      }
-    }
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
 
-    // x86_64 always writes custom values into the fixups.
-    FixedValue = Value;
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+    // Set the symbol type to undefined lazy, but only on construction.
+    //
+    // FIXME: Do not hardcode.
+    bool Created;
+    MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+    if (Created)
+      Entry.setFlags(Entry.getFlags() | 0x0001);
   }
+}
 
-  void RecordScatteredRelocation(const MCAssembler &Asm,
-                                 const MCAsmLayout &Layout,
-                                 const MCFragment *Fragment,
-                                 const MCFixup &Fixup, MCValue Target,
-                                 unsigned Log2Size,
-                                 uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      //
-      // Note that there is no longer any semantic difference between these two
-      // relocation types from the linkers point of view, this is done solely
-      // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
-        (unsigned)macho::RIT_Generic_LocalDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
-
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                   std::vector<MachSymbolData> &LocalSymbolData,
+                   std::vector<MachSymbolData> &ExternalSymbolData,
+                   std::vector<MachSymbolData> &UndefinedSymbolData) {
+  // Build section lookup table.
+  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it, ++Index)
+    SectionIndexMap[&it->getSection()] = Index;
+  assert(Index <= 256 && "Too many sections!");
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // Build the symbol arrays and the string table, but only for non-local
+  // symbols.
+  //
+  // The particular order that we collect the symbols and create the string
+  // table, then sort the symbols is chosen to match 'as'. Even though it
+  // doesn't matter for correctness, this is important for letting us diff .o
+  // files.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
+
+    if (!it->isExternal() && !Symbol.isUndefined())
+      continue;
+
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
-
-  void RecordARMScatteredRelocation(const MCAssembler &Asm,
-                                    const MCAsmLayout &Layout,
-                                    const MCFragment *Fragment,
-                                    const MCFixup &Fixup, MCValue Target,
-                                    unsigned Log2Size,
-                                    uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_Difference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
 
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+    if (Symbol.isUndefined()) {
+      MSD.SectionIndex = 0;
+      UndefinedSymbolData.push_back(MSD);
+    } else if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      ExternalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      ExternalSymbolData.push_back(MSD);
     }
-
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
   }
 
-  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout,
-                                   const MCFragment *Fragment,
-                                   const MCFixup &Fixup, MCValue Target,
-                                   uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_ARM_Half;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint32_t Value2 = 0;
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_ARM_HalfDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
-
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
-    //
-    // For these two r_type relocations they always have a pair following them
-    // and the r_length bits are used differently.  The encoding of the
-    // r_length is as follows:
-    // low bit of r_length:
-    //  0 - :lower16: for movw instructions
-    //  1 - :upper16: for movt instructions
-    // high bit of r_length:
-    //  0 - arm instructions
-    //  1 - thumb instructions
-    // the other half of the relocated expression is in the following pair
-    // relocation entry in the the low 16 bits of r_address field.
-    unsigned ThumbBit = 0;
-    unsigned MovtBit = 0;
-    switch ((unsigned)Fixup.getKind()) {
-    default: break;
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-      MovtBit = 1;
-      break;
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      MovtBit = 1;
-      // Fallthrough
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      ThumbBit = 1;
-      break;
-    }
+  // Now add the data for local symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
 
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
 
-    if (Type == macho::RIT_ARM_HalfDifference) {
-      uint32_t OtherHalf = MovtBit
-        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+    if (it->isExternal() || Symbol.isUndefined())
+      continue;
 
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((OtherHalf       <<  0) |
-                   (macho::RIT_Pair << 24) |
-                   (MovtBit         << 28) |
-                   (ThumbBit        << 29) |
-                   (IsPCRel         << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (MovtBit     << 28) |
-                 (ThumbBit    << 29) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
 
-  void RecordTLVPRelocation(const MCAssembler &Asm,
-                            const MCAsmLayout &Layout,
-                            const MCFragment *Fragment,
-                            const MCFixup &Fixup, MCValue Target,
-                            uint64_t &FixedValue) {
-    assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
-           !is64Bit() &&
-           "Should only be called with a 32-bit TLVP relocation!");
-
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-    uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = 0;
-
-    // Get the symbol data.
-    MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-    unsigned Index = SD_A->getIndex();
-
-    // We're only going to have a second symbol in pic mode and it'll be a
-    // subtraction from the picbase. For 32-bit pic the addend is the difference
-    // between the picbase and the next address.  For 32-bit static the addend
-    // is zero.
-    if (Target.getSymB()) {
-      // If this is a subtraction then we're pcrel.
-      uint32_t FixupAddress =
-        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-      MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
-      IsPCRel = 1;
-      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
-                    Target.getConstant());
-      FixedValue += 1ULL << Log2Size;
+    if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      LocalSymbolData.push_back(MSD);
     } else {
-      FixedValue = 0;
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      LocalSymbolData.push_back(MSD);
     }
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = Value;
-    MRE.Word1 = ((Index                  <<  0) |
-                 (IsPCRel                << 24) |
-                 (Log2Size               << 25) |
-                 (1                      << 27) | // Extern
-                 (macho::RIT_Generic_TLV << 28)); // Type
-    Relocations[Fragment->getParent()].push_back(MRE);
   }
 
-  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
-                                       unsigned &Log2Size) {
-    RelocType = unsigned(macho::RIT_Vanilla);
-    Log2Size = ~0U;
+  // External and undefined symbols are required to be in lexicographic order.
+  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices.
+  Index = 0;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+  // The string table is padded to a multiple of 4.
+  while (StringTable.size() % 4)
+    StringTable += '\x00';
+}
 
-    switch (Kind) {
-    default:
-      return false;
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  uint64_t StartAddress = 0;
+  const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+  for (int i = 0, n = Order.size(); i != n ; ++i) {
+    const MCSectionData *SD = Order[i];
+    StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+    SectionAddress[SD] = StartAddress;
+    StartAddress += Layout.getSectionAddressSize(SD);
+
+    // Explicitly pad the section to match the alignment requirements of the
+    // following one. This is for 'gas' compatibility, it shouldn't
+    /// strictly be necessary.
+    StartAddress += getPaddingSize(SD, Layout);
+  }
+}
 
-    case FK_Data_1:
-      Log2Size = llvm::Log2_32(1);
-      return true;
-    case FK_Data_2:
-      Log2Size = llvm::Log2_32(2);
-      return true;
-    case FK_Data_4:
-      Log2Size = llvm::Log2_32(4);
-      return true;
-    case FK_Data_8:
-      Log2Size = llvm::Log2_32(8);
-      return true;
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                const MCAsmLayout &Layout) {
+  computeSectionAddresses(Asm, Layout);
 
-      // Handle 24-bit branch kinds.
-    case ARM::fixup_arm_ldst_pcrel_12:
-    case ARM::fixup_arm_pcrel_10:
-    case ARM::fixup_arm_adr_pcrel_12:
-    case ARM::fixup_arm_condbranch:
-    case ARM::fixup_arm_uncondbranch:
-      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  // Create symbol data for any indirect symbols.
+  BindIndirectSymbols(Asm);
 
-      // Handle Thumb branches.
-    case ARM::fixup_arm_thumb_br:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(2);
-      return true;
-      
-    case ARM::fixup_arm_thumb_bl:
-    case ARM::fixup_arm_thumb_blx:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
+}
 
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                       const MCSymbolData &DataA,
+                                       const MCFragment &FB,
+                                       bool InSet,
+                                       bool IsPCRel) const {
+  if (InSet)
+    return true;
 
-    case ARM::fixup_arm_movw_lo16:
-    case ARM::fixup_arm_movw_lo16_pcrel:
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_Half);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
+  // The effective address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) == 0.
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+  const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+  const MCSection &SecA = SA.getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+
+  if (IsPCRel) {
+    // The simple (Darwin, except on x86_64) way of dealing with this was to
+    // assume that any reference to a temporary symbol *must* be a temporary
+    // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+    // relocation to a temporary symbol (in the same section) is fully
+    // resolved. This also works in conjunction with absolutized .set, which
+    // requires the compiler to use .set to absolutize the differences between
+    // symbols which the compiler knows to be assembly time constants, so we
+    // don't need to worry about considering symbol differences fully resolved.
+
+    if (!Asm.getBackend().hasReliableSymbolDifference()) {
+      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+        return false;
       return true;
     }
+  } else {
+    if (!TargetObjectWriter->useAggressiveSymbolFolding())
+      return false;
   }
-  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                           const MCFragment *Fragment, const MCFixup &Fixup,
-                           MCValue Target, uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size;
-    unsigned RelocType = macho::RIT_Vanilla;
-    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
-      report_fatal_error("unknown ARM fixup kind!");
-      return;
-    }
 
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.  Differences always require scattered
-    // relocations.
-    if (Target.getSymB()) {
-      if (RelocType == macho::RIT_ARM_Half ||
-          RelocType == macho::RIT_ARM_HalfDifference)
-        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
-                                           Target, FixedValue);
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                          Target, Log2Size, FixedValue);
-    }
+  const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
 
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  A_Base = FA.getAtom();
+  if (!A_Base)
+    return false;
 
-    // FIXME: For other platforms, we need to use scattered relocations for
-    // internal relocations with offsets.  If this is an internal relocation
-    // with an offset, it also needs a scattered relocation entry.
-    //
-    // Is this right for ARM?
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel && RelocType == macho::RIT_Vanilla)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
-                                          Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // FIXME!
-      report_fatal_error("FIXME: relocations to absolute targets "
-                         "not yet implemented");
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
-      }
+  B_Base = FB.getAtom();
+  if (!B_Base)
+    return false;
 
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
-      } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
+  // If the atoms are the same, they are guaranteed to have the same address.
+  if (A_Base == B_Base)
+    return true;
 
-      // The type is determined by the fixup kind.
-      Type = RelocType;
-    }
+  // Otherwise, we can't prove this is fully resolved.
+  return false;
+}
 
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+  unsigned NumSections = Asm.size();
+
+  // The section data starts after the header, the segment load command (and
+  // section headers) and the symbol table.
+  unsigned NumLoadCommands = 1;
+  uint64_t LoadCommandsSize = is64Bit() ?
+    macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+    macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+  // Add the symbol table load command sizes, if used.
+  unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+    UndefinedSymbolData.size();
+  if (NumSymbols) {
+    NumLoadCommands += 2;
+    LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                         macho::DysymtabLoadCommandSize);
   }
 
-  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) {
-    // FIXME: These needs to be factored into the target Mach-O writer.
-    if (isARM()) {
-      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-    if (is64Bit()) {
-      RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // If this is a 32-bit TLVP reloc it's handled a bit differently.
-    if (Target.getSymA() &&
-        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
-      RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.
-    // Differences always require scattered relocations.
-    if (Target.getSymB())
-        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                         Target, Log2Size, FixedValue);
-
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-
-    // If this is an internal relocation with an offset, it also needs a
-    // scattered relocation entry.
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                       Target, Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      //
-      // FIXME: Currently, these are never generated (see code below). I cannot
-      // find a case where they are actually emitted.
-      Type = macho::RIT_Vanilla;
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
-      }
-
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
-      } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
-
-      Type = macho::RIT_Vanilla;
-    }
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+  // Compute the total size of the section data, as well as its file size and vm
+  // size.
+  uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                               macho::Header32Size) + LoadCommandsSize;
+  uint64_t SectionDataSize = 0;
+  uint64_t SectionDataFileSize = 0;
+  uint64_t VMSize = 0;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    uint64_t Address = getSectionAddress(&SD);
+    uint64_t Size = Layout.getSectionAddressSize(&SD);
+    uint64_t FileSize = Layout.getSectionFileSize(&SD);
+    FileSize += getPaddingSize(&SD, Layout);
+
+    VMSize = std::max(VMSize, Address + Size);
+
+    if (SD.getSection().isVirtualSection())
+      continue;
+
+    SectionDataSize = std::max(SectionDataSize, Address + Size);
+    SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
   }
 
-  void BindIndirectSymbols(MCAssembler &Asm) {
-    // This is the point where 'as' creates actual symbols for indirect symbols
-    // (in the following two passes). It would be easier for us to do this
-    // sooner when we see the attribute, but that makes getting the order in the
-    // symbol table much more complicated than it is worth.
-    //
-    // FIXME: Revisit this when the dust settles.
+  // The section data is padded to 4 bytes.
+  //
+  // FIXME: Is this machine dependent?
+  unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+  SectionDataFileSize += SectionDataPadding;
+
+  // Write the prolog, starting with the header and load command...
+  WriteHeader(NumLoadCommands, LoadCommandsSize,
+              Asm.getSubsectionsViaSymbols());
+  WriteSegmentLoadCommand(NumSections, VMSize,
+                          SectionDataStart, SectionDataSize);
+
+  // ... and then the section headers.
+  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    unsigned NumRelocs = Relocs.size();
+    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+    WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+    RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+  }
 
-    // Bind non lazy symbol pointers first.
-    unsigned IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
+  // Write the symbol table load command, if used.
+  if (NumSymbols) {
+    unsigned FirstLocalSymbol = 0;
+    unsigned NumLocalSymbols = LocalSymbolData.size();
+    unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+    unsigned NumExternalSymbols = ExternalSymbolData.size();
+    unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+    unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+    unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+    unsigned NumSymTabSymbols =
+      NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+    uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+    uint64_t IndirectSymbolOffset = 0;
+
+    // If used, the indirect symbols are written after the section data.
+    if (NumIndirectSymbols)
+      IndirectSymbolOffset = RelocTableEnd;
+
+    // The symbol table is written after the indirect symbol data.
+    uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+    // The string table is written after symbol table.
+    uint64_t StringTableOffset =
+      SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                              macho::Nlist32Size);
+    WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                           StringTableOffset, StringTable.size());
+
+    WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                             FirstExternalSymbol, NumExternalSymbols,
+                             FirstUndefinedSymbol, NumUndefinedSymbols,
+                             IndirectSymbolOffset, NumIndirectSymbols);
+  }
 
-      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
-        continue;
+  // Write the actual section data.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    Asm.WriteSectionData(it, Layout);
 
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
+    uint64_t Pad = getPaddingSize(it, Layout);
+    for (unsigned int i = 0; i < Pad; ++i)
+      Write8(0);
+  }
 
-      Asm.getOrCreateSymbolData(*it->Symbol);
-    }
+  // Write the extra padding.
+  WriteZeros(SectionDataPadding);
 
-    // Then lazy symbol pointers and symbol stubs.
-    IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
-        continue;
-
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
-
-      // Set the symbol type to undefined lazy, but only on construction.
-      //
-      // FIXME: Do not hardcode.
-      bool Created;
-      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
-      if (Created)
-        Entry.setFlags(Entry.getFlags() | 0x0001);
+  // Write the relocation entries.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    // Write the section relocation entries, in reverse order to match 'as'
+    // (approximately, the exact algorithm is more complicated than this).
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+      Write32(Relocs[e - i - 1].Word0);
+      Write32(Relocs[e - i - 1].Word1);
     }
   }
 
-  /// ComputeSymbolTable - Compute the symbol table data
-  ///
-  /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
-  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          std::vector<MachSymbolData> &LocalSymbolData,
-                          std::vector<MachSymbolData> &ExternalSymbolData,
-                          std::vector<MachSymbolData> &UndefinedSymbolData) {
-    // Build section lookup table.
-    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
-    unsigned Index = 1;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it, ++Index)
-      SectionIndexMap[&it->getSection()] = Index;
-    assert(Index <= 256 && "Too many sections!");
-
-    // Index 0 is always the empty string.
-    StringMap<uint64_t> StringIndexMap;
-    StringTable += '\x00';
-
-    // Build the symbol arrays and the string table, but only for non-local
-    // symbols.
-    //
-    // The particular order that we collect the symbols and create the string
-    // table, then sort the symbols is chosen to match 'as'. Even though it
-    // doesn't matter for correctness, this is important for letting us diff .o
-    // files.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
-
-      if (!it->isExternal() && !Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
+  // Write the symbol table data, if used.
+  if (NumSymbols) {
+    // Write the indirect symbol entries.
+    for (MCAssembler::const_indirect_symbol_iterator
+           it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // Indirect symbols in the non lazy symbol pointer section have some
+      // special handling.
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+      if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+        // If this symbol is defined and internal, mark it as such.
+        if (it->Symbol->isDefined() &&
+            !Asm.getSymbolData(*it->Symbol).isExternal()) {
+          uint32_t Flags = macho::ISF_Local;
+          if (it->Symbol->isAbsolute())
+            Flags |= macho::ISF_Absolute;
+          Write32(Flags);
+          continue;
+        }
       }
 
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isUndefined()) {
-        MSD.SectionIndex = 0;
-        UndefinedSymbolData.push_back(MSD);
-      } else if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        ExternalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        ExternalSymbolData.push_back(MSD);
-      }
+      Write32(Asm.getSymbolData(*it->Symbol).getIndex());
     }
 
-    // Now add the data for local symbols.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
-
-      if (it->isExternal() || Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        LocalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        LocalSymbolData.push_back(MSD);
-      }
-    }
+    // FIXME: Check that offsets match computed ones.
 
-    // External and undefined symbols are required to be in lexicographic order.
-    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
-
-    // Set the symbol indices.
-    Index = 0;
+    // Write the symbol table entries.
     for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-      LocalSymbolData[i].SymbolData->setIndex(Index++);
+      WriteNlist(LocalSymbolData[i], Layout);
     for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+      WriteNlist(ExternalSymbolData[i], Layout);
     for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
-    // The string table is padded to a multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
-  }
-
-  void computeSectionAddresses(const MCAssembler &Asm,
-                               const MCAsmLayout &Layout) {
-    uint64_t StartAddress = 0;
-    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
-    for (int i = 0, n = Order.size(); i != n ; ++i) {
-      const MCSectionData *SD = Order[i];
-      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
-      SectionAddress[SD] = StartAddress;
-      StartAddress += Layout.getSectionAddressSize(SD);
-      // Explicitly pad the section to match the alignment requirements of the
-      // following one. This is for 'gas' compatibility, it shouldn't
-      /// strictly be necessary.
-      StartAddress += getPaddingSize(SD, Layout);
-    }
-  }
-
-  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    computeSectionAddresses(Asm, Layout);
-
-    // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm);
-
-    // Compute symbol table information and bind symbol indices.
-    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
-                       UndefinedSymbolData);
-  }
-
-  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-    if (InSet)
-      return true;
+      WriteNlist(UndefinedSymbolData[i], Layout);
 
-    // The effective address is
-    //     addr(atom(A)) + offset(A)
-    //   - addr(atom(B)) - offset(B)
-    // and the offsets are not relocatable, so the fixup is fully resolved when
-    //  addr(atom(A)) - addr(atom(B)) == 0.
-    const MCSymbolData *A_Base = 0, *B_Base = 0;
-
-    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
-    const MCSection &SecA = SA.getSection();
-    const MCSection &SecB = FB.getParent()->getSection();
-
-    if (IsPCRel) {
-      // The simple (Darwin, except on x86_64) way of dealing with this was to
-      // assume that any reference to a temporary symbol *must* be a temporary
-      // symbol in the same atom, unless the sections differ. Therefore, any
-      // PCrel relocation to a temporary symbol (in the same section) is fully
-      // resolved. This also works in conjunction with absolutized .set, which
-      // requires the compiler to use .set to absolutize the differences between
-      // symbols which the compiler knows to be assembly time constants, so we
-      // don't need to worry about considering symbol differences fully
-      // resolved.
-
-      if (!Asm.getBackend().hasReliableSymbolDifference()) {
-        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
-          return false;
-        return true;
-      }
-    } else {
-      if (!TargetObjectWriter->useAggressiveSymbolFolding())
-        return false;
-    }
-
-    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
-
-    A_Base = FA.getAtom();
-    if (!A_Base)
-      return false;
-
-    B_Base = FB.getAtom();
-    if (!B_Base)
-      return false;
-
-    // If the atoms are the same, they are guaranteed to have the same address.
-    if (A_Base == B_Base)
-      return true;
-
-    // Otherwise, we can't prove this is fully resolved.
-    return false;
+    // Write the string table.
+    OS << StringTable.str();
   }
-
-  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    unsigned NumSections = Asm.size();
-
-    // The section data starts after the header, the segment load command (and
-    // section headers) and the symbol table.
-    unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = is64Bit() ?
-      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
-      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
-
-    // Add the symbol table load command sizes, if used.
-    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
-      UndefinedSymbolData.size();
-    if (NumSymbols) {
-      NumLoadCommands += 2;
-      LoadCommandsSize += (macho::SymtabLoadCommandSize +
-                           macho::DysymtabLoadCommandSize);
-    }
-
-    // Compute the total size of the section data, as well as its file size and
-    // vm size.
-    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
-                                 macho::Header32Size) + LoadCommandsSize;
-    uint64_t SectionDataSize = 0;
-    uint64_t SectionDataFileSize = 0;
-    uint64_t VMSize = 0;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      const MCSectionData &SD = *it;
-      uint64_t Address = getSectionAddress(&SD);
-      uint64_t Size = Layout.getSectionAddressSize(&SD);
-      uint64_t FileSize = Layout.getSectionFileSize(&SD);
-      FileSize += getPaddingSize(&SD, Layout);
-
-      VMSize = std::max(VMSize, Address + Size);
-
-      if (SD.getSection().isVirtualSection())
-        continue;
-
-      SectionDataSize = std::max(SectionDataSize, Address + Size);
-      SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
-    }
-
-    // The section data is padded to 4 bytes.
-    //
-    // FIXME: Is this machine dependent?
-    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
-    SectionDataFileSize += SectionDataPadding;
-
-    // Write the prolog, starting with the header and load command...
-    WriteHeader(NumLoadCommands, LoadCommandsSize,
-                Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand(NumSections, VMSize,
-                            SectionDataStart, SectionDataSize);
-
-    // ... and then the section headers.
-    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      unsigned NumRelocs = Relocs.size();
-      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
-      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
-    }
-
-    // Write the symbol table load command, if used.
-    if (NumSymbols) {
-      unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = LocalSymbolData.size();
-      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
-      unsigned NumExternalSymbols = ExternalSymbolData.size();
-      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
-      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
-      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
-      unsigned NumSymTabSymbols =
-        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
-      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
-      uint64_t IndirectSymbolOffset = 0;
-
-      // If used, the indirect symbols are written after the section data.
-      if (NumIndirectSymbols)
-        IndirectSymbolOffset = RelocTableEnd;
-
-      // The symbol table is written after the indirect symbol data.
-      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
-
-      // The string table is written after symbol table.
-      uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
-                                                macho::Nlist32Size);
-      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                             StringTableOffset, StringTable.size());
-
-      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
-                               FirstExternalSymbol, NumExternalSymbols,
-                               FirstUndefinedSymbol, NumUndefinedSymbols,
-                               IndirectSymbolOffset, NumIndirectSymbols);
-    }
-
-    // Write the actual section data.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      Asm.WriteSectionData(it, Layout);
-
-      uint64_t Pad = getPaddingSize(it, Layout);
-      for (unsigned int i = 0; i < Pad; ++i)
-        Write8(0);
-    }
-
-    // Write the extra padding.
-    WriteZeros(SectionDataPadding);
-
-    // Write the relocation entries.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      // Write the section relocation entries, in reverse order to match 'as'
-      // (approximately, the exact algorithm is more complicated than this).
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-        Write32(Relocs[e - i - 1].Word0);
-        Write32(Relocs[e - i - 1].Word1);
-      }
-    }
-
-    // Write the symbol table data, if used.
-    if (NumSymbols) {
-      // Write the indirect symbol entries.
-      for (MCAssembler::const_indirect_symbol_iterator
-             it = Asm.indirect_symbol_begin(),
-             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-        // Indirect symbols in the non lazy symbol pointer section have some
-        // special handling.
-        const MCSectionMachO &Section =
-          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
-          // If this symbol is defined and internal, mark it as such.
-          if (it->Symbol->isDefined() &&
-              !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = macho::ISF_Local;
-            if (it->Symbol->isAbsolute())
-              Flags |= macho::ISF_Absolute;
-            Write32(Flags);
-            continue;
-          }
-        }
-
-        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
-      }
-
-      // FIXME: Check that offsets match computed ones.
-
-      // Write the symbol table entries.
-      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist(LocalSymbolData[i], Layout);
-      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist(ExternalSymbolData[i], Layout);
-      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist(UndefinedSymbolData[i], Layout);
-
-      // Write the string table.
-      OS << StringTable.str();
-    }
-  }
-};
-
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index e0a9de82983f..348cd4c9ab1b 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,7 +27,7 @@ using namespace llvm;
 
 /// hasFlag - Determine if a feature has a flag; '+' or '-'
 ///
-static inline bool hasFlag(const std::string &Feature) {
+static inline bool hasFlag(const StringRef Feature) {
   assert(!Feature.empty() && "Empty string");
   // Get first character
   char Ch = Feature[0];
@@ -37,13 +37,13 @@ static inline bool hasFlag(const std::string &Feature) {
 
 /// StripFlag - Return string stripped of flag.
 ///
-static inline std::string StripFlag(const std::string &Feature) {
+static inline std::string StripFlag(const StringRef Feature) {
   return hasFlag(Feature) ? Feature.substr(1) : Feature;
 }
 
 /// isEnabled - Return true if enable flag; '+'.
 ///
-static inline bool isEnabled(const std::string &Feature) {
+static inline bool isEnabled(const StringRef Feature) {
   assert(!Feature.empty() && "Empty string");
   // Get first character
   char Ch = Feature[0];
@@ -53,16 +53,22 @@ static inline bool isEnabled(const std::string &Feature) {
 
 /// PrependFlag - Return a string with a prepended flag; '+' or '-'.
 ///
-static inline std::string PrependFlag(const std::string &Feature,
-                                      bool IsEnabled) {
+static inline std::string PrependFlag(const StringRef Feature,
+                                    bool IsEnabled) {
   assert(!Feature.empty() && "Empty string");
-  if (hasFlag(Feature)) return Feature;
-  return std::string(IsEnabled ? "+" : "-") + Feature;
+  if (hasFlag(Feature))
+    return Feature;
+  std::string Prefix = IsEnabled ? "+" : "-";
+  Prefix += Feature;
+  return Prefix;
 }
 
 /// Split - Splits a string of comma separated items in to a vector of strings.
 ///
-static void Split(std::vector<std::string> &V, const std::string &S) {
+static void Split(std::vector<std::string> &V, const StringRef S) {
+  if (S.empty())
+    return;
+
   // Start at beginning of string.
   size_t Pos = 0;
   while (true) {
@@ -88,7 +94,7 @@ static std::string Join(const std::vector<std::string> &V) {
   std::string Result;
   // If the vector is not empty 
   if (!V.empty()) {
-    // Start with the CPU feature
+    // Start with the first feature
     Result = V[0];
     // For each successive feature
     for (size_t i = 1; i < V.size(); i++) {
@@ -103,7 +109,7 @@ static std::string Join(const std::vector<std::string> &V) {
 }
 
 /// Adding features.
-void SubtargetFeatures::AddFeature(const std::string &String,
+void SubtargetFeatures::AddFeature(const StringRef String,
                                    bool IsEnabled) {
   // Don't add empty features
   if (!String.empty()) {
@@ -113,16 +119,16 @@ void SubtargetFeatures::AddFeature(const std::string &String,
 }
 
 /// Find KV in array using binary search.
-template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+template<typename T> const T *Find(const StringRef S, const T *A, size_t L) {
   // Make the lower bound element we're looking for
   T KV;
-  KV.Key = S.c_str();
+  KV.Key = S.data();
   // Determine the end of the array
   const T *Hi = A + L;
   // Binary search the array
   const T *F = std::lower_bound(A, Hi, KV);
   // If not found then return NULL
-  if (F == Hi || std::string(F->Key) != S) return NULL;
+  if (F == Hi || StringRef(F->Key) != S) return NULL;
   // Return the found array item
   return F;
 }
@@ -170,7 +176,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
 //                    SubtargetFeatures Implementation
 //===----------------------------------------------------------------------===//
 
-SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
   // Break up string into separate features
   Split(Features, Initial);
 }
@@ -179,33 +185,6 @@ SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
 std::string SubtargetFeatures::getString() const {
   return Join(Features);
 }
-void SubtargetFeatures::setString(const std::string &Initial) {
-  // Throw out old features
-  Features.clear();
-  // Break up string into separate features
-  Split(Features, LowercaseString(Initial));
-}
-
-
-/// setCPU - Set the CPU string.  Replaces previous setting.  Setting to ""
-/// clears CPU.
-void SubtargetFeatures::setCPU(const std::string &String) {
-  Features[0] = LowercaseString(String);
-}
-
-
-/// setCPUIfNone - Setting CPU string only if no string is set.
-///
-void SubtargetFeatures::setCPUIfNone(const std::string &String) {
-  if (Features[0].empty()) setCPU(String);
-}
-
-/// getCPU - Returns current CPU.
-///
-const std::string & SubtargetFeatures::getCPU() const {
-  return Features[0];
-}
-
 
 /// SetImpliedBits - For each feature that is (transitively) implied by this
 /// feature, set it.
@@ -245,14 +224,48 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
   }
 }
 
-/// getBits - Get feature bits.
+/// ToggleFeature - Toggle a feature and returns the newly updated feature
+/// bits.
+uint64_t
+SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+                                 const SubtargetFeatureKV *FeatureTable,
+                                 size_t FeatureTableSize) {
+  // Find feature in table.
+  const SubtargetFeatureKV *FeatureEntry =
+    Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+  // If there is a match
+  if (FeatureEntry) {
+    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+      Bits &= ~FeatureEntry->Value;
+
+      // For each feature that implies this, clear it.
+      ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    } else {
+      Bits |=  FeatureEntry->Value;
+
+      // For each feature that this implies, set it.
+      SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    }
+  } else {
+    errs() << "'" << Feature
+           << "' is not a recognized feature for this target"
+           << " (ignoring feature)\n";
+  }
+
+  return Bits;
+}
+           
+
+/// getFeatureBits - Get feature bits a CPU.
 ///
-uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
-                                          size_t CPUTableSize,
-                                    const SubtargetFeatureKV *FeatureTable,
-                                          size_t FeatureTableSize) {
-  assert(CPUTable && "missing CPU table");
-  assert(FeatureTable && "missing features table");
+uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
+                                         const SubtargetFeatureKV *CPUTable,
+                                         size_t CPUTableSize,
+                                         const SubtargetFeatureKV *FeatureTable,
+                                         size_t FeatureTableSize) {
+  if (!FeatureTableSize || !CPUTableSize)
+    return 0;
+
 #ifndef NDEBUG
   for (size_t i = 1; i < CPUTableSize; i++) {
     assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
@@ -266,31 +279,33 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
   uint64_t Bits = 0;                    // Resulting bits
 
   // Check if help is needed
-  if (Features[0] == "help")
+  if (CPU == "help")
     Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
   
-  // Find CPU entry
-  const SubtargetFeatureKV *CPUEntry =
-                            Find(Features[0], CPUTable, CPUTableSize);
-  // If there is a match
-  if (CPUEntry) {
-    // Set base feature bits
-    Bits = CPUEntry->Value;
-
-    // Set the feature implied by this CPU feature, if any.
-    for (size_t i = 0; i < FeatureTableSize; ++i) {
-      const SubtargetFeatureKV &FE = FeatureTable[i];
-      if (CPUEntry->Value & FE.Value)
-        SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+  // Find CPU entry if CPU name is specified.
+  if (!CPU.empty()) {
+    const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+    // If there is a match
+    if (CPUEntry) {
+      // Set base feature bits
+      Bits = CPUEntry->Value;
+
+      // Set the feature implied by this CPU feature, if any.
+      for (size_t i = 0; i < FeatureTableSize; ++i) {
+        const SubtargetFeatureKV &FE = FeatureTable[i];
+        if (CPUEntry->Value & FE.Value)
+          SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+      }
+    } else {
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
     }
-  } else {
-    errs() << "'" << Features[0]
-           << "' is not a recognized processor for this target"
-           << " (ignoring processor)\n";
   }
+
   // Iterate through each feature
-  for (size_t i = 1; i < Features.size(); i++) {
-    const std::string &Feature = Features[i];
+  for (size_t i = 0, E = Features.size(); i < E; i++) {
+    const StringRef Feature = Features[i];
     
     // Check for help
     if (Feature == "+help")
@@ -323,9 +338,10 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
   return Bits;
 }
 
-/// Get info pointer
-void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
-                                       size_t TableSize) {
+/// Get scheduling itinerary of a CPU.
+void *SubtargetFeatures::getItinerary(const StringRef CPU,
+                                      const SubtargetInfoKV *Table,
+                                      size_t TableSize) {
   assert(Table && "missing table");
 #ifndef NDEBUG
   for (size_t i = 1; i < TableSize; i++) {
@@ -334,12 +350,12 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
 #endif
 
   // Find entry
-  const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+  const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize);
   
   if (Entry) {
     return Entry->Value;
   } else {
-    errs() << "'" << Features[0]
+    errs() << "'" << CPU
            << "' is not a recognized processor for this target"
            << " (ignoring processor)\n";
     return NULL;
@@ -367,10 +383,7 @@ void SubtargetFeatures::dump() const {
 /// subtarget. It would be better if we could encode this information
 /// into the IR. See <rdar://5972456>.
 ///
-void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
-                                                    const Triple& Triple) {
-  setCPU(CPU);
-
+void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
   if (Triple.getVendor() == Triple::Apple) {
     if (Triple.getArch() == Triple::ppc) {
       // powerpc-apple-*
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
new file mode 100644
index 000000000000..4b31c7557dd3
--- /dev/null
+++ b/lib/Object/Binary.cpp
@@ -0,0 +1,96 @@
+//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+// Include headers for createBinary.
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
+
+using namespace llvm;
+using namespace object;
+
+Binary::~Binary() {
+  delete Data;
+}
+
+Binary::Binary(unsigned int Type, MemoryBuffer *Source)
+  : TypeID(Type)
+  , Data(Source) {}
+
+StringRef Binary::getData() const {
+  return Data->getBuffer();
+}
+
+StringRef Binary::getFileName() const {
+  return Data->getBufferIdentifier();
+}
+
+error_code object::createBinary(MemoryBuffer *Source,
+                                OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> scopedSource(Source);
+  if (!Source)
+    return make_error_code(errc::invalid_argument);
+  if (Source->getBufferSize() < 64)
+    return object_error::invalid_file_type;
+  sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(),
+                                static_cast<unsigned>(Source->getBufferSize()));
+  error_code ec;
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createELFObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createMachOObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::COFF_FileType: {
+      OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec));
+      if (ec) return ec;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    default: // Unrecognized object file format.
+      return object_error::invalid_file_type;
+  }
+}
+
+error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(Path, File))
+    return ec;
+  return createBinary(File.take(), Result);
+}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 642a8ece8b76..68e5e94924d0 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -1,8 +1,10 @@
 add_llvm_library(LLVMObject
+  Binary.cpp
+  COFFObjectFile.cpp
+  ELFObjectFile.cpp
+  Error.cpp
   MachOObject.cpp
   MachOObjectFile.cpp
   Object.cpp
   ObjectFile.cpp
-  COFFObjectFile.cpp
-  ELFObjectFile.cpp
   )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 86bf44baaeb6..07de6bc99973 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -11,11 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/COFF.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Endian.h"
 
 using namespace llvm;
 using namespace object;
@@ -28,174 +26,164 @@ using support::little16_t;
 }
 
 namespace {
-struct coff_file_header {
-  ulittle16_t Machine;
-  ulittle16_t NumberOfSections;
-  ulittle32_t TimeDateStamp;
-  ulittle32_t PointerToSymbolTable;
-  ulittle32_t NumberOfSymbols;
-  ulittle16_t SizeOfOptionalHeader;
-  ulittle16_t Characteristics;
-};
+// Returns false if size is greater than the buffer size. And sets ec.
+bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) {
+  if (m->getBufferSize() < size) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
 }
 
-extern char coff_file_header_layout_static_assert
-            [sizeof(coff_file_header) == 20 ? 1 : -1];
-
-namespace {
-struct coff_symbol {
-  struct StringTableOffset {
-    ulittle32_t Zeroes;
-    ulittle32_t Offset;
-  };
-
-  union {
-    char ShortName[8];
-    StringTableOffset Offset;
-  } Name;
-
-  ulittle32_t Value;
-  little16_t SectionNumber;
-
-  struct {
-    ulittle8_t BaseType;
-    ulittle8_t ComplexType;
-  } Type;
-
-  ulittle8_t  StorageClass;
-  ulittle8_t  NumberOfAuxSymbols;
-};
+// Returns false if any bytes in [addr, addr + size) fall outsize of m.
+bool checkAddr(const MemoryBuffer *m,
+               error_code &ec,
+               uintptr_t addr,
+               uint64_t size) {
+  if (addr + size < addr ||
+      addr + size < size ||
+      addr + size > uintptr_t(m->getBufferEnd())) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
+}
 }
 
-extern char coff_coff_symbol_layout_static_assert
-            [sizeof(coff_symbol) == 18 ? 1 : -1];
+const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const {
+  const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p);
 
-namespace {
-struct coff_section {
-  char Name[8];
-  ulittle32_t VirtualSize;
-  ulittle32_t VirtualAddress;
-  ulittle32_t SizeOfRawData;
-  ulittle32_t PointerToRawData;
-  ulittle32_t PointerToRelocations;
-  ulittle32_t PointerToLinenumbers;
-  ulittle16_t NumberOfRelocations;
-  ulittle16_t NumberOfLinenumbers;
-  ulittle32_t Characteristics;
-};
+# ifndef NDEBUG
+  // Verify that the symbol points to a valid entry in the symbol table.
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(base());
+  if (offset < Header->PointerToSymbolTable
+      || offset >= Header->PointerToSymbolTable
+         + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+    report_fatal_error("Symbol was outside of symbol table.");
+
+  assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+         == 0 && "Symbol did not point to the beginning of a symbol");
+# endif
+
+  return addr;
 }
 
-extern char coff_coff_section_layout_static_assert
-            [sizeof(coff_section) == 40 ? 1 : -1];
+const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const {
+  const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p);
 
-namespace {
-class COFFObjectFile : public ObjectFile {
-private:
-        uint64_t         HeaderOff;
-  const coff_file_header *Header;
-  const coff_section     *SectionTable;
-  const coff_symbol      *SymbolTable;
-  const char             *StringTable;
-
-  const coff_section     *getSection(std::size_t index) const;
-  const char             *getString(std::size_t offset) const;
-
-protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
-
-public:
-  COFFObjectFile(MemoryBuffer *Object);
-  virtual symbol_iterator begin_symbols() const;
-  virtual symbol_iterator end_symbols() const;
-  virtual section_iterator begin_sections() const;
-  virtual section_iterator end_sections() const;
-
-  virtual uint8_t getBytesInAddress() const;
-  virtual StringRef getFileFormatName() const;
-  virtual unsigned getArch() const;
-};
-} // end namespace
-
-SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+# ifndef NDEBUG
+  // Verify that the section points to a valid entry in the section table.
+  if (addr < SectionTable
+      || addr >= (SectionTable + Header->NumberOfSections))
+    report_fatal_error("Section was outside of section table.");
+
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable);
+  assert(offset % sizeof(coff_section) == 0 &&
+         "Section did not point to the beginning of a section");
+# endif
+
+  return addr;
+}
+
+error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
+                                         SymbolRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
   symb += 1 + symb->NumberOfAuxSymbols;
-  Symb.p = reinterpret_cast<intptr_t>(symb);
-  return SymbolRef(Symb, this);
+  Symb.p = reinterpret_cast<uintptr_t>(symb);
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
   // Check for string table entry. First 4 bytes are 0.
   if (symb->Name.Offset.Zeroes == 0) {
     uint32_t Offset = symb->Name.Offset.Offset;
-    return StringRef(getString(Offset));
+    if (error_code ec = getString(Offset, Result))
+      return ec;
+    return object_error::success;
   }
 
   if (symb->Name.ShortName[7] == 0)
     // Null terminated, let ::strlen figure out the length.
-    return StringRef(symb->Name.ShortName);
-  // Not null terminated, use all 8 bytes.
-  return StringRef(symb->Name.ShortName, 8);
+    Result = StringRef(symb->Name.ShortName);
+  else
+    // Not null terminated, use all 8 bytes.
+    Result = StringRef(symb->Name.ShortName, 8);
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  const coff_section *Section = getSection(symb->SectionNumber);
-  char Type = getSymbolNMTypeChar(Symb);
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                            uint64_t &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
   if (Type == 'U' || Type == 'w')
-    return UnknownAddressOrSize;
-  if (Section)
-    return Section->VirtualAddress + symb->Value;
-  return symb->Value;
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->VirtualAddress + symb->Value;
+  else
+    Result = symb->Value;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
+                                         uint64_t &Result) const {
   // FIXME: Return the correct size. This requires looking at all the symbols
   //        in the same section as this symbol, and looking for either the next
   //        symbol, or the end of the section.
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  const coff_section *Section = getSection(symb->SectionNumber);
-  char Type = getSymbolNMTypeChar(Symb);
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
   if (Type == 'U' || Type == 'w')
-    return UnknownAddressOrSize;
-  if (Section)
-    return Section->SizeOfRawData - symb->Value;
-  return 0;
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->SizeOfRawData - symb->Value;
+  else
+    Result = 0;
+  return object_error::success;
 }
 
-char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  char ret = StringSwitch<char>(getSymbolName(Symb))
+error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                               char &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  StringRef name;
+  if (error_code ec = getSymbolName(Symb, name))
+    return ec;
+  char ret = StringSwitch<char>(name)
     .StartsWith(".debug", 'N')
     .StartsWith(".sxdata", 'N')
     .Default('?');
 
-  if (ret != '?')
-    return ret;
+  if (ret != '?') {
+    Result = ret;
+    return object_error::success;
+  }
 
   uint32_t Characteristics = 0;
-  if (const coff_section *Section = getSection(symb->SectionNumber)) {
+  if (symb->SectionNumber > 0) {
+    const coff_section *Section = NULL;
+    if (error_code ec = getSection(symb->SectionNumber, Section))
+      return ec;
     Characteristics = Section->Characteristics;
   }
 
   switch (symb->SectionNumber) {
   case COFF::IMAGE_SYM_UNDEFINED:
     // Check storage classes.
-    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
-      return 'w'; // Don't do ::toupper.
-    else
+    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+      Result = 'w';
+      return object_error::success; // Don't do ::toupper.
+    } else
       ret = 'u';
     break;
   case COFF::IMAGE_SYM_ABSOLUTE:
@@ -227,22 +215,28 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
   if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
     ret = ::toupper(ret);
 
-  return ret;
+  Result = ret;
+  return object_error::success;
 }
 
-bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const {
-  return false;
+error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
+                                            bool &Result) const {
+  Result = false;
+  return object_error::success;
 }
 
-SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
+                                          SectionRef &Result) const {
+  const coff_section *sec = toSec(Sec);
   sec += 1;
-  Sec.p = reinterpret_cast<intptr_t>(sec);
-  return SectionRef(Sec, this);
+  Sec.p = reinterpret_cast<uintptr_t>(sec);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
+                                          StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
   StringRef name;
   if (sec->Name[7] == 0)
     // Null terminated, let ::strlen figure out the length.
@@ -255,64 +249,124 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
   if (name[0] == '/') {
     uint32_t Offset;
     name.substr(1).getAsInteger(10, Offset);
-    return StringRef(getString(Offset));
+    if (error_code ec = getString(Offset, name))
+      return ec;
   }
 
-  // It's just a normal name.
-  return name;
+  Result = name;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
+                                             uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->VirtualAddress;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->VirtualAddress;
+error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
+                                          uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->SizeOfRawData;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->SizeOfRawData;
+error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
+                                              StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
+  // The only thing that we need to verify is that the contents is contained
+  // within the file bounds. We don't need to make sure it doesn't cover other
+  // data, as there's nothing that says that is not allowed.
+  uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
+  uintptr_t con_end = con_start + sec->SizeOfRawData;
+  if (con_end >= uintptr_t(Data->getBufferEnd()))
+    return object_error::parse_failed;
+  Result = StringRef(reinterpret_cast<const char*>(con_start),
+                     sec->SizeOfRawData);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData),
-                   sec->SizeOfRawData);
+error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
+                                         bool &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+  return object_error::success;
 }
 
-bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                 DataRefImpl Symb,
+                                                 bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
 }
 
-COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
-  : ObjectFile(Object) {
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
+  : ObjectFile(Binary::isCOFF, Object, ec) {
+  // Check that we at least have enough room for a header.
+  if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
 
-  HeaderOff = 0;
+  // The actual starting location of the COFF header in the file. This can be
+  // non-zero in PE/COFF files.
+  uint64_t HeaderStart = 0;
 
-  if (base[0] == 0x4d && base[1] == 0x5a) {
+  // Check if this is a PE/COFF file.
+  if (base()[0] == 0x4d && base()[1] == 0x5a) {
     // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
     // PE signature to find 'normal' COFF header.
-    HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c);
-    HeaderOff += 4;
+    if (!checkSize(Data, ec, 0x3c + 8)) return;
+    HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+    // Check the PE header. ("PE\0\0")
+    if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
+      ec = object_error::parse_failed;
+      return;
+    }
+    HeaderStart += 4; // Skip the PE Header.
   }
 
-  Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff);
+  Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
+  if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
+    return;
+  
   SectionTable =
-    reinterpret_cast<const coff_section *>( base
-                                          + HeaderOff
+    reinterpret_cast<const coff_section *>( base()
+                                          + HeaderStart
                                           + sizeof(coff_file_header)
                                           + Header->SizeOfOptionalHeader);
+  if (!checkAddr(Data, ec, uintptr_t(SectionTable),
+                 Header->NumberOfSections * sizeof(coff_section)))
+    return;
+
   SymbolTable =
-    reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable);
+    reinterpret_cast<const coff_symbol *>(base()
+                                          + Header->PointerToSymbolTable);
+  if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+                 Header->NumberOfSymbols * sizeof(coff_symbol)))
+    return;
 
   // Find string table.
-  StringTable = reinterpret_cast<const char *>(base)
-              + Header->PointerToSymbolTable
-              + Header->NumberOfSymbols * 18;
+  StringTable = reinterpret_cast<const char *>(base())
+                + Header->PointerToSymbolTable
+                + Header->NumberOfSymbols * sizeof(coff_symbol);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+    return;
+
+  StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+    return;
+  // Check that the string table is null terminated if has any in it.
+  if (StringTableSize < 4
+      || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+    ec = object_error::parse_failed;
+    return;
+  }
+  
+  ec = object_error::success;
 }
 
 ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SymbolTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
@@ -320,21 +374,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
 ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
   // The symbol table ends where the string table begins.
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(StringTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable);
   return section_iterator(SectionRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::end_sections() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
   return section_iterator(SectionRef(ret, this));
 }
@@ -365,24 +419,37 @@ unsigned COFFObjectFile::getArch() const {
   }
 }
 
-const coff_section *COFFObjectFile::getSection(std::size_t index) const {
-  if (index > 0 && index <= Header->NumberOfSections)
-    return SectionTable + (index - 1);
-  return 0;
+error_code COFFObjectFile::getSection(int32_t index,
+                                      const coff_section *&Result) const {
+  // Check for special index values.
+  if (index == COFF::IMAGE_SYM_UNDEFINED ||
+      index == COFF::IMAGE_SYM_ABSOLUTE ||
+      index == COFF::IMAGE_SYM_DEBUG)
+    Result = NULL;
+  else if (index > 0 && index <= Header->NumberOfSections)
+    // We already verified the section table data, so no need to check again.
+    Result = SectionTable + (index - 1);
+  else
+    return object_error::parse_failed;
+  return object_error::success;
 }
 
-const char *COFFObjectFile::getString(std::size_t offset) const {
-  const ulittle32_t *StringTableSize =
-    reinterpret_cast<const ulittle32_t *>(StringTable);
-  if (offset < *StringTableSize)
-    return StringTable + offset;
-  return 0;
+error_code COFFObjectFile::getString(uint32_t offset,
+                                     StringRef &Result) const {
+  if (StringTableSize <= 4)
+    // Tried to get a string from an empty string table.
+    return object_error::parse_failed;
+  if (offset >= StringTableSize)
+    return object_error::unexpected_eof;
+  Result = StringRef(StringTable + offset);
+  return object_error::success;
 }
 
 namespace llvm {
 
   ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
-    return new COFFObjectFile(Object);
+    error_code ec;
+    return new COFFObjectFile(Object, ec);
   }
 
 } // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index d2a2726ce739..e2ff4dfc0384 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -222,22 +222,24 @@ class ELFObjectFile : public ObjectFile {
   const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
 
 protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const;
 
 public:
-  ELFObjectFile(MemoryBuffer *Object);
+  ELFObjectFile(MemoryBuffer *Object, error_code &ec);
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
   virtual section_iterator begin_sections() const;
@@ -259,9 +261,9 @@ void ELFObjectFile<target_endianness, is64Bits>
   //        an error object around.
   if (!(  symb
         && SymbolTableSection
-        && symb >= (const Elf_Sym*)(base
+        && symb >= (const Elf_Sym*)(base()
                    + SymbolTableSection->sh_offset)
-        && symb <  (const Elf_Sym*)(base
+        && symb <  (const Elf_Sym*)(base()
                    + SymbolTableSection->sh_offset
                    + SymbolTableSection->sh_size)))
     // FIXME: Proper error handling.
@@ -269,8 +271,9 @@ void ELFObjectFile<target_endianness, is64Bits>
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-SymbolRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSymbolNext(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNext(DataRefImpl Symb,
+                                        SymbolRef &Result) const {
   validateSymbol(Symb);
   const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
 
@@ -287,63 +290,80 @@ SymbolRef ELFObjectFile<target_endianness, is64Bits>
     }
   }
 
-  return SymbolRef(Symb, this);
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSymbolName(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolName(DataRefImpl Symb,
+                                        StringRef &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   if (symb->st_name == 0) {
     const Elf_Shdr *section = getSection(symb->st_shndx);
     if (!section)
-      return "";
-    return getString(dot_shstrtab_sec, section->sh_name);
+      Result = "";
+    else
+      Result = getString(dot_shstrtab_sec, section->sh_name);
+    return object_error::success;
   }
 
   // Use the default symbol table name section.
-  return getString(dot_strtab_sec, symb->st_name);
+  Result = getString(dot_strtab_sec, symb->st_name);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolAddress(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolAddress(DataRefImpl Symb,
+                                           uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section;
   switch (symb->st_shndx) {
   case ELF::SHN_COMMON:
    // Undefined symbols have no address yet.
-  case ELF::SHN_UNDEF: return UnknownAddressOrSize;
-  case ELF::SHN_ABS: return symb->st_value;
+  case ELF::SHN_UNDEF:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::SHN_ABS:
+    Result = symb->st_value;
+    return object_error::success;
   default: Section = getSection(symb->st_shndx);
   }
 
   switch (symb->getType()) {
-  case ELF::STT_SECTION: return Section ? Section->sh_addr
-                                        : UnknownAddressOrSize;
+  case ELF::STT_SECTION:
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    return object_error::success;
   case ELF::STT_FUNC:
   case ELF::STT_OBJECT:
   case ELF::STT_NOTYPE:
-    return symb->st_value;
-  default: return UnknownAddressOrSize;
+    Result = symb->st_value;
+    return object_error::success;
+  default:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
   }
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolSize(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolSize(DataRefImpl Symb,
+                                        uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   if (symb->st_size == 0)
-    return UnknownAddressOrSize;
-  return symb->st_size;
+    Result = UnknownAddressOrSize;
+  Result = symb->st_size;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-char ELFObjectFile<target_endianness, is64Bits>
-                  ::getSymbolNMTypeChar(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNMTypeChar(DataRefImpl Symb,
+                                              char &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section = getSection(symb->st_shndx);
@@ -390,89 +410,120 @@ char ELFObjectFile<target_endianness, is64Bits>
         ret = 'W';
   }
 
-  if (ret == '?' && symb->getType() == ELF::STT_SECTION)
-    return StringSwitch<char>(getSymbolName(Symb))
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
+    StringRef name;
+    if (error_code ec = getSymbolName(Symb, name))
+      return ec;
+    Result = StringSwitch<char>(name)
       .StartsWith(".debug", 'N')
       .StartsWith(".note", 'n');
+    return object_error::success;
+  }
 
-  return ret;
+  Result = ret;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-bool ELFObjectFile<target_endianness, is64Bits>
-                  ::isSymbolInternal(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSymbolInternal(DataRefImpl Symb,
+                                           bool &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
 
   if (  symb->getType() == ELF::STT_FILE
      || symb->getType() == ELF::STT_SECTION)
-    return true;
-  return false;
+    Result = true;
+  Result = false;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-SectionRef ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionNext(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
   const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
   sec += Header->e_shentsize;
   Sec.p = reinterpret_cast<intptr_t>(sec);
-  return SectionRef(Sec, this);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSectionName(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionName(DataRefImpl Sec,
+                                         StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSectionAddress(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionAddress(DataRefImpl Sec,
+                                            uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return sec->sh_addr;
+  Result = sec->sh_addr;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSectionSize(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionSize(DataRefImpl Sec,
+                                         uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return sec->sh_size;
+  Result = sec->sh_size;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSectionContents(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionContents(DataRefImpl Sec,
+                                             StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  const char *start = (char*)base + sec->sh_offset;
-  return StringRef(start, sec->sh_size);
+  const char *start = (const char*)base() + sec->sh_offset;
+  Result = StringRef(start, sec->sh_size);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-bool ELFObjectFile<target_endianness, is64Bits>
-                  ::isSectionText(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionText(DataRefImpl Sec,
+                                        bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & ELF::SHF_EXECINSTR)
-    return true;
-  return false;
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                          ::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
-  : ObjectFile(Object)
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
+                                                          , error_code &ec)
+  : ObjectFile(Binary::isELF, Object, ec)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
   , dot_strtab_sec(0) {
-  Header = reinterpret_cast<const Elf_Ehdr *>(base);
+  Header = reinterpret_cast<const Elf_Ehdr *>(base());
 
   if (Header->e_shoff == 0)
     return;
 
   SectionHeaderTable =
-    reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff);
+    reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
   uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
   if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
-         <= base + MapFile->getBufferSize()))
+         <= base() + Data->getBufferSize()))
     // FIXME: Proper error handling.
     report_fatal_error("Section table goes past end of file!");
 
@@ -491,7 +542,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
   dot_shstrtab_sec = getSection(Header->e_shstrndx);
   if (dot_shstrtab_sec) {
     // Verify that the last byte in the string table in a null.
-    if (((const char*)base + dot_shstrtab_sec->sh_offset)
+    if (((const char*)base() + dot_shstrtab_sec->sh_offset)
         [dot_shstrtab_sec->sh_size - 1] != 0)
       // FIXME: Proper error handling.
       report_fatal_error("String table must end with a null terminator!");
@@ -509,7 +560,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
           // FIXME: Proper error handling.
           report_fatal_error("Already found section named .strtab!");
         dot_strtab_sec = sh;
-        const char *dot_strtab = (const char*)base + sh->sh_offset;
+        const char *dot_strtab = (const char*)base() + sh->sh_offset;
           if (dot_strtab[sh->sh_size - 1] != 0)
             // FIXME: Proper error handling.
             report_fatal_error("String table must end with a null terminator!");
@@ -548,7 +599,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::begin_sections() const {
   DataRefImpl ret;
   memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
+  ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
   return section_iterator(SectionRef(ret, this));
 }
 
@@ -557,7 +608,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::end_sections() const {
   DataRefImpl ret;
   memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base
+  ret.p = reinterpret_cast<intptr_t>(base()
                                      + Header->e_shoff
                                      + (Header->e_shentsize * Header->e_shnum));
   return section_iterator(SectionRef(ret, this));
@@ -613,7 +664,7 @@ const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
 ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
   const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
   return reinterpret_cast<const Elf_Sym *>(
-           base
+           base()
            + sec->sh_offset
            + (Symb.d.a * sec->sh_entsize));
 }
@@ -656,8 +707,8 @@ const char *ELFObjectFile<target_endianness, is64Bits>
   assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
   if (offset >= section->sh_size)
     // FIXME: Proper error handling.
-    report_fatal_error("Sybol name offset outside of string table!");
-  return (const char *)base + section->sh_offset + offset;
+    report_fatal_error("Symbol name offset outside of string table!");
+  return (const char *)base() + section->sh_offset + offset;
 }
 
 // EI_CLASS, EI_DATA.
@@ -673,14 +724,15 @@ namespace llvm {
 
   ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
     std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+    error_code ec;
     if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, false>(Object);
+      return new ELFObjectFile<support::little, false>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, false>(Object);
+      return new ELFObjectFile<support::big, false>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, true>(Object);
+      return new ELFObjectFile<support::little, true>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, true>(Object);
+      return new ELFObjectFile<support::big, true>(Object, ec);
     // FIXME: Proper error handling.
     report_fatal_error("Not an ELF object file!");
   }
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
new file mode 100644
index 000000000000..25946257ab5a
--- /dev/null
+++ b/lib/Object/Error.cpp
@@ -0,0 +1,57 @@
+//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class _object_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+}
+
+const char *_object_error_category::name() const {
+  return "llvm.object";
+}
+
+std::string _object_error_category::message(int ev) const {
+  switch (ev) {
+  case object_error::success: return "Success";
+  case object_error::invalid_file_type:
+    return "The file was not recognized as a valid object file";
+  case object_error::parse_failed:
+    return "Invalid data was encountered while parsing the file";
+  case object_error::unexpected_eof:
+    return "The end of the file was unexpectedly encountered";
+  default:
+    llvm_unreachable("An enumerator of object_error does not have a message "
+                     "defined.");
+  }
+}
+
+error_condition _object_error_category::default_error_condition(int ev) const {
+  if (ev == object_error::success)
+    return errc::success;
+  return errc::invalid_argument;
+}
+
+const error_category &object::object_category() {
+  static _object_error_category o;
+  return o;
+}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 877cbfbdb808..26a6e136d753 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -32,8 +32,8 @@ typedef MachOObject::LoadCommandInfo LoadCommandInfo;
 
 class MachOObjectFile : public ObjectFile {
 public:
-  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO)
-    : ObjectFile(Object),
+  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec)
+    : ObjectFile(Binary::isMachO, Object, ec),
       MachOObj(MOO),
       RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
 
@@ -47,19 +47,21 @@ public:
   virtual unsigned getArch() const;
 
 protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+                                           bool &Result) const;
 
 private:
   MachOObject *MachOObj;
@@ -68,16 +70,21 @@ private:
   void moveToNextSection(DataRefImpl &DRI) const;
   void getSymbolTableEntry(DataRefImpl DRI,
                            InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void getSymbol64TableEntry(DataRefImpl DRI,
+                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
   void moveToNextSymbol(DataRefImpl &DRI) const;
   void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
+  void getSection64(DataRefImpl DRI,
+                    InMemoryStruct<macho::Section64> &Res) const;
 };
 
 ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  error_code ec;
   std::string Err;
   MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
   if (!MachOObj)
     return NULL;
-  return new MachOObjectFile(Buffer, MachOObj);
+  return new MachOObjectFile(Buffer, MachOObj, ec);
 }
 
 /*===-- Symbols -----------------------------------------------------------===*/
@@ -113,35 +120,81 @@ void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
                                  Res);
 }
 
+void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
+    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+  if (RegisteredStringTable != DRI.d.a) {
+    MachOObj->RegisterStringTable(*SymtabLoadCmd);
+    RegisteredStringTable = DRI.d.a;
+  }
+
+  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+                                   Res);
+}
 
-SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const {
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
+                                          SymbolRef &Result) const {
   DRI.d.b++;
   moveToNextSymbol(DRI);
-  return SymbolRef(DRI, this);
+  Result = SymbolRef(DRI, this);
+  return object_error::success;
 }
 
-StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return MachOObj->getStringAtIndex(Entry->StringIndex);
+error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
+                                          StringRef &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  }
+  return object_error::success;
 }
 
-uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return Entry->Value;
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
+                                             uint64_t &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Value;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Value;
+  }
+  return object_error::success;
 }
 
-uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const {
-  return UnknownAddressOrSize;
+error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+                                          uint64_t &Result) const {
+  Result = UnknownAddressOrSize;
+  return object_error::success;
 }
 
-char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
+                                                char &Result) const {
+  uint8_t Type, Flags;
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  }
 
   char Char;
-  switch (Entry->Type & macho::STF_TypeMask) {
+  switch (Type & macho::STF_TypeMask) {
     case macho::STT_Undefined:
       Char = 'u';
       break;
@@ -154,15 +207,24 @@ char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const {
       break;
   }
 
-  if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern))
+  if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
     Char = toupper(Char);
-  return Char;
+  Result = Char;
+  return object_error::success;
 }
 
-bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return Entry->Flags & macho::STF_StabsEntryMask;
+error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
+                                             bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  }
+  return object_error::success;
 }
 
 ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
@@ -204,10 +266,12 @@ void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
   }
 }
 
-SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const {
+error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
+                                           SectionRef &Result) const {
   DRI.d.b++;
   moveToNextSection(DRI);
-  return SectionRef(DRI, this);
+  Result = SectionRef(DRI, this);
+  return object_error::success;
 }
 
 void
@@ -219,43 +283,121 @@ MachOObjectFile::getSection(DataRefImpl DRI,
   MachOObj->ReadSection(LCI, DRI.d.b, Res);
 }
 
-StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+void
+MachOObjectFile::getSection64(DataRefImpl DRI,
+                            InMemoryStruct<macho::Section64> &Res) const {
+  InMemoryStruct<macho::Segment64LoadCommand> SLC;
   LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
-  InMemoryStruct<macho::Section> Sect;
-  MachOObj->ReadSection(LCI, DRI.d.b, Sect);
-
-  static char Result[34];
-  strcpy(Result, SLC->Name);
-  strcat(Result, ",");
-  strcat(Result, Sect->Name);
-  return StringRef(Result);
+  MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
 }
 
-uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return Sect->Address;
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  if (LCI.Command.Type == macho::LCT_Segment64)
+    return true;
+  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+  return false;
 }
 
-uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return Sect->Size;
+error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
+                                           StringRef &Result) const {
+  // FIXME: thread safety.
+  static char result[34];
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Segment64LoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section64> Sect;
+    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  } else {
+    InMemoryStruct<macho::SegmentLoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section> Sect;
+    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  }
+  Result = StringRef(result);
+  return object_error::success;
 }
 
-StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return MachOObj->getData(Sect->Offset, Sect->Size);
+error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
+                                              uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Address;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Address;
+  }
+  return object_error::success;
 }
 
-bool MachOObjectFile::isSectionText(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
-  return !strcmp(SLC->Name, "__TEXT");
+error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
+                                           uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Size;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Size;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
+                                               StringRef &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
+                                          bool &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  }
+  return object_error::success;
 }
 
 ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 603b23c74e93..9a373ad21bd2 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -41,19 +41,28 @@ LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
 }
 
 void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
-  // We can't use unwrap() here because the argument to ++ must be an lvalue.
-  ++*reinterpret_cast<ObjectFile::section_iterator*>(SI);
+  error_code ec;
+  unwrap(SI)->increment(ec);
+  if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
 }
 
 const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getName().data();
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getName(ret))
+   report_fatal_error(ec.message());
+  return ret.data();
 }
 
 uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getSize();
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getSize(ret))
+    report_fatal_error(ec.message());
+  return ret;
 }
 
 const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getContents().data();
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getContents(ret))
+    report_fatal_error(ec.message());
+  return ret.data();
 }
-
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 47b63115a94c..a7798df33fe5 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -21,18 +21,8 @@
 using namespace llvm;
 using namespace object;
 
-ObjectFile::ObjectFile(MemoryBuffer *Object)
-  : MapFile(Object) {
-  assert(MapFile && "Must be a valid MemoryBuffer!");
-  base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart());
-}
-
-ObjectFile::~ObjectFile() {
-  delete MapFile;
-}
-
-StringRef ObjectFile::getFilename() const {
-  return MapFile->getBufferIdentifier();
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+  : Binary(Type, source) {
 }
 
 ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index c3169acabbc7..c64da6e137ea 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -2084,6 +2085,23 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width,
   return fs;
 }
 
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+   an APSInt, whose initial bit-width and signed-ness are used to determine the
+   precision of the conversion.
+ */
+APFloat::opStatus
+APFloat::convertToInteger(APSInt &result,
+                          roundingMode rounding_mode, bool *isExact) const
+{
+  unsigned bitWidth = result.getBitWidth();
+  SmallVector<uint64_t, 4> parts(result.getNumWords());
+  opStatus status = convertToInteger(
+    parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
+  // Keeps the original signed-ness.
+  result = APInt(bitWidth, (unsigned)parts.size(), parts.data());
+  return status;
+}
+
 /* Convert an unsigned integer SRC to a floating point number,
    rounding according to ROUNDING_MODE.  The sign of the floating
    point number is not modified.  */
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 74d61c13a5c9..76265d445f45 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2164,12 +2164,33 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
 }
 
 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
-                     bool Signed) const {
+                     bool Signed, bool formatAsCLiteral) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
          "Radix should be 2, 8, 10, or 16!");
 
+  const char *Prefix = "";
+  if (formatAsCLiteral) {
+    switch (Radix) {
+      case 2:
+        // Binary literals are a non-standard extension added in gcc 4.3:
+        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+        Prefix = "0b";
+        break;
+      case 8:
+        Prefix = "0";
+        break;
+      case 16:
+        Prefix = "0x";
+        break;
+    }
+  }
+
   // First, check for a zero value and just short circuit the logic below.
   if (*this == 0) {
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
     Str.push_back('0');
     return;
   }
@@ -2193,6 +2214,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
       }
     }
 
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
+
     while (N) {
       *--BufPtr = Digits[N % Radix];
       N /= Radix;
@@ -2212,6 +2238,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     Str.push_back('-');
   }
 
+  while (*Prefix) {
+    Str.push_back(*Prefix);
+    ++Prefix;
+  };
+
   // We insert the digits backward, then reverse them to get the right order.
   unsigned StartDig = Str.size();
 
@@ -2251,7 +2282,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
 /// to the methods above.
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
-  toString(S, Radix, Signed);
+  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
   return S.str();
 }
 
@@ -2266,7 +2297,7 @@ void APInt::dump() const {
 
 void APInt::print(raw_ostream &OS, bool isSigned) const {
   SmallString<40> S;
-  this->toString(S, 10, isSigned);
+  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
   OS << S.str();
 }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 7f1c0d320b11..29143377628d 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -911,8 +911,8 @@ size_t alias::getOptionWidth() const {
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   size_t L = std::strlen(ArgStr);
-  errs() << "  -" << ArgStr;
-  errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+  outs() << "  -" << ArgStr;
+  outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 493f7083dbb3..81382d08dc23 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -529,8 +529,8 @@ ConstantRange::sub(const ConstantRange &Other) const {
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
   APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
-  APInt NewLower = getLower() - Other.getLower();
-  APInt NewUpper = getUpper() - Other.getUpper() + 1;
+  APInt NewLower = getLower() - Other.getUpper() + 1;
+  APInt NewUpper = getUpper() - Other.getLower();
   if (NewLower == NewUpper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 4299aa4e931d..c525a1228129 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -214,7 +214,12 @@ std::string sys::getHostCPUName() {
                // As found in a Summer 2010 model iMac.
       case 37: // Intel Core i7, laptop version.
         return "corei7";
-      case 42: // SandyBridge
+
+      // SandyBridge:
+      case 42: // Intel Core i7 processor. All processors are manufactured
+               // using the 32 nm process.
+      case 44: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 32 nm process.
       case 45:
         return "corei7-avx";
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index dbdb303a4fdd..7e094ee78f36 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -113,6 +113,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Win32: return "win32";
   case Haiku: return "haiku";
   case Minix: return "minix";
+  case RTEMS: return "rtems";
   }
 
   return "<invalid>";
@@ -281,7 +282,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
     return cellspu;
   else if (ArchName == "msp430")
     return msp430;
-  else if (ArchName == "mips" || ArchName == "mipsallegrex")
+  else if (ArchName == "mips" || ArchName == "mipseb" ||
+           ArchName == "mipsallegrex")
     return mips;
   else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
            ArchName == "psp")
@@ -350,6 +352,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
     return Haiku;
   else if (OSName.startswith("minix"))
     return Minix;
+  else if (OSName.startswith("rtems"))
+    return RTEMS;
   else
     return UnknownOS;
 }
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 75cea2961a9d..d62123cc985e 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -14,6 +14,11 @@
 using namespace llvm;
 
 std::string Twine::str() const {
+  // If we're storing only a std::string, just return it.
+  if (LHSKind == StdStringKind && RHSKind == EmptyKind)
+    return *static_cast<const std::string*>(LHS);
+
+  // Otherwise, flatten and copy the contents first.
   SmallString<256> Vec;
   return toStringRef(Vec).str();
 }
@@ -37,9 +42,9 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
       // Already null terminated, yay!
       return StringRef(static_cast<const char*>(LHS));
     case StdStringKind: {
-        const std::string *str = static_cast<const std::string*>(LHS);
-        return StringRef(str->c_str(), str->size());
-      }
+      const std::string *str = static_cast<const std::string*>(LHS);
+      return StringRef(str->c_str(), str->size());
+    }
     default:
       break;
     }
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 430cf2ed8e8f..f295b92e4a5b 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -842,6 +842,9 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
 
   // Save the name
   path = FNBuffer;
+
+  // By default mkstemp sets the mode to 0600, so update mode bits now.
+  AddPermissionBits (*this, 0666);
 #elif defined(HAVE_MKTEMP)
   // If we don't have mkstemp, use the old and obsolete mktemp function.
   if (mktemp(FNBuffer) == 0)
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 4227844ae506..fc5f5809cb40 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -115,7 +115,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
        E = OpenedHandles.end(); I != E; ++I) {
     FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
     if (ptr) {
-      return (void *) ptr;
+      return (void *)(intptr_t)ptr;
     }
   }
 
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index 84862d69e2b5..379645d2ff60 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -2,7 +2,7 @@
 
 #ifdef HAVE__ALLOCA
   EXPLICIT_SYMBOL(_alloca)
-  EXPLICIT_SYMBOL2(alloca, _alloca);
+  EXPLICIT_SYMBOL2(alloca, _alloca)
 #endif
 #ifdef HAVE___ALLOCA
   EXPLICIT_SYMBOL(__alloca)
@@ -62,5 +62,5 @@
 
 /* msvcrt */
 #if defined(_MSC_VER)
-  EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+  EXPLICIT_SYMBOL2(alloca, _alloca_probe)
 #endif
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4679f7443bfc..08dc340f8541 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,24 +16,29 @@
 #define TARGET_ARM_H
 
 #include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 
 namespace llvm {
 
+class ARMAsmPrinter;
 class ARMBaseTargetMachine;
 class FunctionPass;
 class JITCodeEmitter;
-class formatted_raw_ostream;
-class MCCodeEmitter;
-class TargetAsmBackend;
 class MachineInstr;
-class ARMAsmPrinter;
+class MCCodeEmitter;
 class MCInst;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class TargetAsmBackend;
+class formatted_raw_ostream;
 
-MCCodeEmitter *createARMMCCodeEmitter(const Target &,
-                                      TargetMachine &TM,
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
                                       MCContext &Ctx);
 
 TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
@@ -53,11 +58,15 @@ FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
-extern Target TheARMTarget, TheThumbTarget;
-
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 6af5f85e8a85..cf333ccd49ba 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -16,18 +16,26 @@
 
 include "llvm/Target/Target.td"
 
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb  : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+                                  "Thumb mode">;
 
 //===----------------------------------------------------------------------===//
 // ARM Subtarget features.
 //
 
-def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
                                    "Enable VFP2 instructions">;
-def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
-                                   "Enable VFP3 instructions">;
-def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
-                                   "Enable NEON instructions">;
-def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+                                   "Enable VFP3 instructions",
+                                   [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+                                   "Enable NEON instructions",
+                                   [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
                                      "Enable Thumb2 instructions">;
 def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
                                      "Does not support ARM mode execution">;
@@ -75,32 +83,32 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                "AvoidCPSRPartialUpdate", "true",
                                  "Avoid CPSR partial update for OOO execution">;
 
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+                                 "Supports v7 DSP instructions in Thumb2.">;
+
 // Multiprocessing extension.
 def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
                                  "Supports Multiprocessing extension">;
 
-// ARM architectures.
-def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
-                                   "ARM v4T">;
-def ArchV5T     : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
-                                   "ARM v5T">;
-def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
-                                   "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
-                                   "ARM v6">;
-def ArchV6M     : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
-                                   "ARM v6m",
-                                   [FeatureNoARM, FeatureDB]>;
-def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
-                                   "ARM v6t2",
-                                   [FeatureThumb2]>;
-def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
-                                   "ARM v7A",
-                                   [FeatureThumb2, FeatureNEON, FeatureDB]>;
-def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
-                                   "ARM v7M",
-                                   [FeatureThumb2, FeatureNoARM, FeatureDB,
-                                    FeatureHWDiv]>;
+// ARM ISAs.
+def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
+                                   "Support ARM v4T instructions">;
+def HasV5TOps   : SubtargetFeature<"v5t", "HasV5TOps", "true",
+                                   "Support ARM v5T instructions",
+                                   [HasV4TOps]>;
+def HasV5TEOps  : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+                             "Support ARM v5TE, v5TEj, and v5TExp instructions",
+                                   [HasV5TOps]>;
+def HasV6Ops    : SubtargetFeature<"v6", "HasV6Ops", "true",
+                                   "Support ARM v6 instructions",
+                                   [HasV5TEOps]>;
+def HasV6T2Ops  : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+                                   "Support ARM v6t2 instructions",
+                                   [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
+                                   "Support ARM v7 instructions",
+                                   [HasV6T2Ops]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Processors supported.
@@ -109,8 +117,6 @@ def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
 include "ARMSchedule.td"
 
 // ARM processor families.
-def ProcOthers  : SubtargetFeature<"others", "ARMProcFamily", "Others",
-                                   "One of the other ARM processor families">;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureNEONForFP,
@@ -135,64 +141,76 @@ def : ProcNoItin<"strongarm1100",   []>;
 def : ProcNoItin<"strongarm1110",   []>;
 
 // V4T Processors.
-def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
-def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
-def : ProcNoItin<"arm710t",         [ArchV4T]>;
-def : ProcNoItin<"arm720t",         [ArchV4T]>;
-def : ProcNoItin<"arm9",            [ArchV4T]>;
-def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
-def : ProcNoItin<"arm920",          [ArchV4T]>;
-def : ProcNoItin<"arm920t",         [ArchV4T]>;
-def : ProcNoItin<"arm922t",         [ArchV4T]>;
-def : ProcNoItin<"arm940t",         [ArchV4T]>;
-def : ProcNoItin<"ep9312",          [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s",      [HasV4TOps]>;
+def : ProcNoItin<"arm710t",         [HasV4TOps]>;
+def : ProcNoItin<"arm720t",         [HasV4TOps]>;
+def : ProcNoItin<"arm9",            [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm920",          [HasV4TOps]>;
+def : ProcNoItin<"arm920t",         [HasV4TOps]>;
+def : ProcNoItin<"arm922t",         [HasV4TOps]>;
+def : ProcNoItin<"arm940t",         [HasV4TOps]>;
+def : ProcNoItin<"ep9312",          [HasV4TOps]>;
 
 // V5T Processors.
-def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
-def : ProcNoItin<"arm1020t",        [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi",       [HasV5TOps]>;
+def : ProcNoItin<"arm1020t",        [HasV5TOps]>;
 
 // V5TE Processors.
-def : ProcNoItin<"arm9e",           [ArchV5TE]>;
-def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
-def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm10e",          [ArchV5TE]>;
-def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
-def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
-def : ProcNoItin<"xscale",          [ArchV5TE]>;
-def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
+def : ProcNoItin<"arm9e",           [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s",      [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm10e",          [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e",        [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e",        [HasV5TEOps]>;
+def : ProcNoItin<"xscale",          [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt",          [HasV5TEOps]>;
 
 // V6 Processors.
-def : Processor<"arm1136j-s",       ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1136jf-s",      ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"arm1136j-s",       ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
 
 // V6M Processors.
-def : Processor<"cortex-m0",        ARMV6Itineraries, [ArchV6M]>;
+def : Processor<"cortex-m0",        ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+                                                       FeatureDB]>;
 
 // V6T2 Processors.
-def : Processor<"arm1156t2-s",      ARMV6Itineraries, [ArchV6T2]>;
-def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+def : Processor<"arm1156t2-s",      ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
 
-// V7 Processors.
+// V7a Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
-                                    [ArchV7A, ProcA8]>;
+                                    [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
-                                    [ArchV7A, ProcA9]>;
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
 def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
-                                    [ArchV7A, ProcA9, FeatureMP]>;
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2, FeatureMP]>;
 
 // V7M Processors.
-def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
-def : ProcNoItin<"cortex-m4",       [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
+def : ProcNoItin<"cortex-m3",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv, FeatureDSPThumb2,
+                                     FeatureT2XtPk, FeatureVFP2,
+                                     FeatureVFPOnlySP]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index 618a2b5f3eac..5e438a976732 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -28,14 +28,6 @@
 using namespace llvm;
 
 namespace {
-class ARMMachObjectWriter : public MCMachObjectTargetWriter {
-public:
-  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
-                      uint32_t CPUSubtype)
-    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
-                               /*UseAggressiveSymbolFolding=*/true) {}
-};
-
 class ARMELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   ARMELFObjectWriter(Triple::OSType OSType)
@@ -182,7 +174,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     Value >>= 16;
     // Fallthrough
   case ARM::fixup_t2_movw_lo16:
-  case ARM::fixup_t2_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16_pcrel:  //FIXME: Shouldn't this be shifted like
+                                       // the other hi16 fixup?
   case ARM::fixup_t2_movw_lo16_pcrel: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned i = (Value & 0x800) >> 11;
@@ -192,8 +185,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     // inst{26} = i;
     // inst{14-12} = Mid3;
     // inst{7-0} = Lo8;
-    assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
-            "Out of range pc-relative fixup value!");
+    // The value comes in as the whole thing, not just the portion required
+    // for this fixup, so we need to mask off the bits not handled by this
+    // portion (lo vs. hi).
+    Value &= 0xffff;
     Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
     uint64_t swapped = (Value & 0xFFFF0000) >> 16;
     swapped |= (Value & 0x0000FFFF) << 16;
@@ -423,12 +418,9 @@ public:
     : ARMAsmBackend(T), Subtype(st) { }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createMachObjectWriter(new ARMMachObjectWriter(
-                                    /*Is64Bit=*/false,
-                                    object::mach::CTM_ARM,
-                                    Subtype),
-                                  OS,
-                                  /*IsLittleEndian=*/true);
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_ARM,
+                                     Subtype);
   }
 
   void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
@@ -505,7 +497,13 @@ TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin()) {
-    if (TheTriple.getArchName() == "armv6" ||
+    if (TheTriple.getArchName() == "armv4t" ||
+        TheTriple.getArchName() == "thumbv4t")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+    else if (TheTriple.getArchName() == "armv5e" ||
+        TheTriple.getArchName() == "thumbv5e")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+    else if (TheTriple.getArchName() == "armv6" ||
         TheTriple.getArchName() == "thumbv6")
       return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
     return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index eb7390236df3..dbc3ee41f3da 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -654,7 +654,7 @@ void ARMAsmPrinter::emitAttributes() {
   }
 
   /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
-   * since NEON can have 1 (allowed) or 2 (fused MAC operations) */
+   * since NEON can have 1 (allowed) or 2 (MAC operations) */
   if (Subtarget->hasNEON()) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
                                ARMBuildAttrs::Allowed);
@@ -1010,19 +1010,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         MI->dump();
         assert(0 && "Unsupported opcode for unwinding information");
       case ARM::MOVr:
-      case ARM::tMOVgpr2gpr:
-      case ARM::tMOVgpr2tgpr:
         Offset = 0;
         break;
       case ARM::ADDri:
         Offset = -MI->getOperand(2).getImm();
         break;
       case ARM::SUBri:
-      case ARM::t2SUBrSPi:
-        Offset =  MI->getOperand(2).getImm();
+        Offset = MI->getOperand(2).getImm();
         break;
       case ARM::tSUBspi:
-        Offset =  MI->getOperand(2).getImm()*4;
+        Offset = MI->getOperand(2).getImm()*4;
         break;
       case ARM::tADDspi:
       case ARM::tADDrSPi:
@@ -1072,39 +1069,18 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
 
 extern cl::opt<bool> EnableARMEHABI;
 
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  unsigned Opc = MI->getOpcode();
-  switch (Opc) {
-  default: break;
-  case ARM::B: {
-    // B is just a Bcc with an 'always' predicate.
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(ARM::Bcc);
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::LDMIA_RET: {
-    // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as
-    // such has additional code-gen properties and scheduling information.
-    // To emit it, we just construct as normal and set the opcode to LDMIA_UPD.
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(ARM::LDMIA_UPD);
-    OutStreamer.EmitInstruction(TmpInst);
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
     return;
-  }
-  case ARM::t2ADDrSPi:
-  case ARM::t2ADDrSPi12:
-  case ARM::t2SUBrSPi:
-  case ARM::t2SUBrSPi12:
-    assert ((MI->getOperand(1).getReg() == ARM::SP) &&
-            "Unexpected source register!");
-    break;
 
+  // Check for manual lowerings.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
   case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
   case ARM::DBG_VALUE: {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
@@ -1115,14 +1091,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  case ARM::tBfar: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::tBL);
-    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-          MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   case ARM::LEApcrel:
   case ARM::tLEApcrel:
   case ARM::t2LEApcrel: {
@@ -1153,39 +1121,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitInstruction(TmpInst);
     return;
   }
-  case ARM::MOVPCRX: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::MOVr);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    // Add 's' bit operand (always reg0 for this)
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   // Darwin call instructions are just normal call instructions with different
   // clobber semantics (they clobber R9).
-  case ARM::BLr9:
-  case ARM::BLr9_pred:
-  case ARM::BLXr9:
-  case ARM::BLXr9_pred: {
-    unsigned newOpc;
-    switch (Opc) {
-    default: assert(0);
-    case ARM::BLr9:       newOpc = ARM::BL; break;
-    case ARM::BLr9_pred:  newOpc = ARM::BL_pred; break;
-    case ARM::BLXr9:      newOpc = ARM::BLX; break;
-    case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break;
-    }
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(newOpc);
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   case ARM::BXr9_CALL:
   case ARM::BX_CALL: {
     {
@@ -1215,6 +1152,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
       TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.EmitInstruction(TmpInst);
     }
     {
@@ -1445,7 +1385,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::t2BR_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
     MCInst TmpInst;
-    TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+    TmpInst.setOpcode(ARM::tMOVr);
     TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     // Add predicate operands.
@@ -1494,7 +1434,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // mov pc, target
     MCInst TmpInst;
     unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
-      ARM::MOVr : ARM::tMOVgpr2gpr;
+      ARM::MOVr : ARM::tMOVr;
     TmpInst.setOpcode(Opc);
     TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -1507,7 +1447,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitInstruction(TmpInst);
 
     // Make sure the Thumb jump table is 4-byte aligned.
-    if (Opc == ARM::tMOVgpr2gpr)
+    if (Opc == ARM::tMOVr)
       EmitAlignment(2);
 
     // Output the data for the jump table itself
@@ -1599,11 +1539,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *Label = GetARMSJLJEHLabel();
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+      TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ValReg));
       TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      // 's' bit operand
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.AddComment("eh_setjmp begin");
       OutStreamer.EmitInstruction(TmpInst);
     }
@@ -1817,7 +1758,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+      TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
       TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
       // Predicate.
@@ -1858,75 +1799,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  // Tail jump branches are really just branch instructions with additional
-  // code-gen attributes. Convert them to the canonical form here.
-  case ARM::TAILJMPd:
-  case ARM::TAILJMPdND: {
-    MCInst TmpInst, TmpInst2;
-    // Lower the instruction as-is to get the operands properly converted.
-    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
-    TmpInst.setOpcode(ARM::Bcc);
-    TmpInst.addOperand(TmpInst2.getOperand(0));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::tTAILJMPd:
-  case ARM::tTAILJMPdND: {
-    MCInst TmpInst, TmpInst2;
-    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
-    // The Darwin toolchain doesn't support tail call relocations of 16-bit
-    // branches.
-    TmpInst.setOpcode(Opc == ARM::tTAILJMPd ? ARM::t2B : ARM::tB);
-    TmpInst.addOperand(TmpInst2.getOperand(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::TAILJMPrND:
-  case ARM::tTAILJMPrND:
-  case ARM::TAILJMPr:
-  case ARM::tTAILJMPr: {
-    unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND)
-      ? ARM::BX : ARM::tBX;
-    MCInst TmpInst;
-    TmpInst.setOpcode(newOpc);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Predicate.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-
-  // These are the pseudos created to comply with stricter operand restrictions
-  // on ARMv5. Lower them now to "normal" instructions, since all the
-  // restrictions are already satisfied.
-  case ARM::MULv5:
-    EmitPatchedInstruction(MI, ARM::MUL);
-    return;
-  case ARM::MLAv5:
-    EmitPatchedInstruction(MI, ARM::MLA);
-    return;
-  case ARM::SMULLv5:
-    EmitPatchedInstruction(MI, ARM::SMULL);
-    return;
-  case ARM::UMULLv5:
-    EmitPatchedInstruction(MI, ARM::UMULL);
-    return;
-  case ARM::SMLALv5:
-    EmitPatchedInstruction(MI, ARM::SMLAL);
-    return;
-  case ARM::UMLALv5:
-    EmitPatchedInstruction(MI, ARM::UMLAL);
-    return;
-  case ARM::UMAALv5:
-    EmitPatchedInstruction(MI, ARM::UMAAL);
-    return;
   }
 
   MCInst TmpInst;
@@ -1944,11 +1816,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(TM, MAI);
+    return new ARMInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 5f9169ef7f77..7741fc4b34e8 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -21,6 +21,8 @@
 
 namespace llvm {
 
+class MCOperand;
+
 namespace ARM {
   enum DW_ISA {
     DW_ISA_ARM_thumb = 1,
@@ -72,6 +74,9 @@ public:
   void EmitStartOfAsmFile(Module &M);
   void EmitEndOfAsmFile(Module &M);
 
+  // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
 private:
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
   void emitAttributes();
@@ -84,6 +89,10 @@ private:
 
   void EmitUnwindingInstruction(const MachineInstr *MI);
 
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
 public:
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 
@@ -100,6 +109,7 @@ public:
       llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
   }
 
+  MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
   MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
                                         const MachineBasicBlock *MBB) const;
   MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -107,7 +117,7 @@ public:
   MCSymbol *GetARMSJLJEHLabel(void) const;
 
   MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
-  
+
   /// EmitMachineConstantPoolValue - Print a machine constantpool value to
   /// the .s file.
   virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h
index 36edbad7a601..458f7dd1f784 100644
--- a/lib/Target/ARM/ARMBaseInfo.h
+++ b/lib/Target/ARM/ARMBaseInfo.h
@@ -17,20 +17,12 @@
 #ifndef ARMBASEINFO_H
 #define ARMBASEINFO_H
 
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 
 // Note that the following auto-generated files only defined enum types, and
 // so are safe to include here.
 
-// Defines symbolic names for ARM registers.  This defines a mapping from
-// register name to register number.
-//
-#include "ARMGenRegisterNames.inc"
-
-// Defines symbolic names for the ARM instructions.
-//
-#include "ARMGenInstrNames.inc"
-
 namespace llvm {
 
 // Enums corresponding to ARM condition codes
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 44a397611526..649bd7d5ce3f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,7 +18,6 @@
 #include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "ARMGenInstrInfo.inc"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
@@ -31,10 +30,15 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -74,7 +78,7 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
 };
 
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     Subtarget(STI) {
   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
@@ -136,9 +140,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   MachineInstr *UpdateMI = NULL;
   MachineInstr *MemMI = NULL;
   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned NumOps = TID.getNumOperands();
-  bool isLoad = !TID.mayStore();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned NumOps = MCID.getNumOperands();
+  bool isLoad = !MCID.mayStore();
   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
   const MachineOperand &Base = MI->getOperand(2);
   const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -475,8 +479,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
   // FIXME: This confuses implicit_def with optional CPSR def.
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
     return false;
 
   bool Found = false;
@@ -495,11 +499,11 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
 /// By default, this returns true for every instruction with a
 /// PredicateOperand.
 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return false;
 
-  if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
     ARMFunctionInfo *AFI =
       MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
     return AFI->isThumb2Function();
@@ -524,35 +528,23 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
-  // Basic size info comes from the TSFlags field.
-  const TargetInstrDesc &TID = MI->getDesc();
-  uint64_t TSFlags = TID.TSFlags;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.getSize())
+    return MCID.getSize();
 
-  unsigned Opc = MI->getOpcode();
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: {
     // If this machine instr is an inline asm, measure it.
     if (MI->getOpcode() == ARM::INLINEASM)
       return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
     if (MI->isLabel())
       return 0;
+  unsigned Opc = MI->getOpcode();
     switch (Opc) {
-    default:
-      llvm_unreachable("Unknown or unset size field for instr!");
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
     case TargetOpcode::PROLOG_LABEL:
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::DBG_VALUE:
       return 0;
-    }
-    break;
-  }
-  case ARMII::Size8Bytes: return 8;          // ARM instruction x 2.
-  case ARMII::Size4Bytes: return 4;          // ARM / Thumb2 instruction.
-  case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
-  case ARMII::SizeSpecial: {
-    switch (Opc) {
     case ARM::MOVi16_ga_pcrel:
     case ARM::MOVTi16_ga_pcrel:
     case ARM::t2MOVi16_ga_pcrel:
@@ -588,9 +580,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // entry is one byte; TBH two byte each.
       unsigned EntrySize = (Opc == ARM::t2TBB_JT)
         ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
-      unsigned NumOps = TID.getNumOperands();
+      unsigned NumOps = MCID.getNumOperands();
       MachineOperand JTOP =
-        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
       unsigned JTI = JTOP.getIndex();
       const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
       assert(MJTI != 0);
@@ -616,8 +608,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // Otherwise, pseudo-instruction sizes are zero.
       return 0;
     }
-  }
-  }
   return 0; // Not reached
 }
 
@@ -647,7 +637,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVQ;
+    Opc = ARM::VORRq;
   else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VMOVQQ;
   else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
@@ -657,6 +647,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
   MIB.addReg(SrcReg, getKillRegState(KillSrc));
+  if (Opc == ARM::VORRq)
+    MIB.addReg(SrcReg, getKillRegState(KillSrc));
   if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
     AddDefaultPred(MIB);
 }
@@ -788,7 +780,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
     break;
   case ARM::STRi12:
   case ARM::t2STRi12:
-  case ARM::tSpill:
+  case ARM::tSTRspi:
   case ARM::VSTRD:
   case ARM::VSTRS:
     if (MI->getOperand(1).isFI() &&
@@ -923,7 +915,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
     break;
   case ARM::LDRi12:
   case ARM::t2LDRi12:
-  case ARM::tRestore:
+  case ARM::tLDRspi:
   case ARM::VLDRD:
   case ARM::VLDRS:
     if (MI->getOperand(1).isFI() &&
@@ -1269,20 +1261,20 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
-bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                           unsigned NumCycles,
-                                           unsigned ExtraPredCycles,
-                                           float Probability,
-                                           float Confidence) const {
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+                    unsigned NumCycles, unsigned ExtraPredCycles,
+                    const BranchProbability &Probability) const {
   if (!NumCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * NumCycles;
-  UnpredCost += 1.0; // The branch itself
-  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+  UnpredCost /= Probability.getDenominator();
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
 
-  return (float)(NumCycles + ExtraPredCycles) < UnpredCost;
+  return (NumCycles + ExtraPredCycles) <= UnpredCost;
 }
 
 bool ARMBaseInstrInfo::
@@ -1290,16 +1282,23 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
                     unsigned TCycles, unsigned TExtra,
                     MachineBasicBlock &FMBB,
                     unsigned FCycles, unsigned FExtra,
-                    float Probability, float Confidence) const {
+                    const BranchProbability &Probability) const {
   if (!TCycles || !FCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
-  UnpredCost += 1.0; // The branch itself
-  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
-
-  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
+  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+  TUnpredCost /= Probability.getDenominator();
+    
+  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+  unsigned FUnpredCost = Comp * FCycles;
+  FUnpredCost /= Probability.getDenominator();
+
+  unsigned UnpredCost = TUnpredCost + FUnpredCost;
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1363,7 +1362,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                 unsigned FrameReg, int &Offset,
                                 const ARMBaseInstrInfo &TII) {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   bool isSub = false;
 
@@ -1803,7 +1802,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned Class = Desc.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
   if (UOps)
@@ -1906,10 +1905,10 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
-                                  const TargetInstrDesc &DefTID,
+                                  const MCInstrDesc &DefMCID,
                                   unsigned DefClass,
                                   unsigned DefIdx, unsigned DefAlign) const {
-  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     // Def is the address writeback.
     return ItinData->getOperandCycle(DefClass, DefIdx);
@@ -1924,7 +1923,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
     DefCycle = RegNo;
     bool isSLoad = false;
 
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLDMSIA:
     case ARM::VLDMSIA_UPD:
@@ -1947,10 +1946,10 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
-                                 const TargetInstrDesc &DefTID,
+                                 const MCInstrDesc &DefMCID,
                                  unsigned DefClass,
                                  unsigned DefIdx, unsigned DefAlign) const {
-  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     // Def is the address writeback.
     return ItinData->getOperandCycle(DefClass, DefIdx);
@@ -1982,10 +1981,10 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
-                                  const TargetInstrDesc &UseTID,
+                                  const MCInstrDesc &UseMCID,
                                   unsigned UseClass,
                                   unsigned UseIdx, unsigned UseAlign) const {
-  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     return ItinData->getOperandCycle(UseClass, UseIdx);
 
@@ -1999,7 +1998,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
     UseCycle = RegNo;
     bool isSStore = false;
 
-    switch (UseTID.getOpcode()) {
+    switch (UseMCID.getOpcode()) {
     default: break;
     case ARM::VSTMSIA:
     case ARM::VSTMSIA_UPD:
@@ -2022,10 +2021,10 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
-                                 const TargetInstrDesc &UseTID,
+                                 const MCInstrDesc &UseMCID,
                                  unsigned UseClass,
                                  unsigned UseIdx, unsigned UseAlign) const {
-  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     return ItinData->getOperandCycle(UseClass, UseIdx);
 
@@ -2051,14 +2050,14 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                                    const TargetInstrDesc &DefTID,
+                                    const MCInstrDesc &DefMCID,
                                     unsigned DefIdx, unsigned DefAlign,
-                                    const TargetInstrDesc &UseTID,
+                                    const MCInstrDesc &UseMCID,
                                     unsigned UseIdx, unsigned UseAlign) const {
-  unsigned DefClass = DefTID.getSchedClass();
-  unsigned UseClass = UseTID.getSchedClass();
+  unsigned DefClass = DefMCID.getSchedClass();
+  unsigned UseClass = UseMCID.getSchedClass();
 
-  if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 
   // This may be a def / use of a variable_ops instruction, the operand
@@ -2066,7 +2065,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   // figure it out.
   int DefCycle = -1;
   bool LdmBypass = false;
-  switch (DefTID.getOpcode()) {
+  switch (DefMCID.getOpcode()) {
   default:
     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
     break;
@@ -2077,7 +2076,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::VLDMSIA:
   case ARM::VLDMSIA_UPD:
   case ARM::VLDMSDB_UPD:
-    DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
     break;
 
   case ARM::LDMIA_RET:
@@ -2098,7 +2097,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::t2LDMIA_UPD:
   case ARM::t2LDMDB_UPD:
     LdmBypass = 1;
-    DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
     break;
   }
 
@@ -2107,7 +2106,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     DefCycle = 2;
 
   int UseCycle = -1;
-  switch (UseTID.getOpcode()) {
+  switch (UseMCID.getOpcode()) {
   default:
     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
     break;
@@ -2118,7 +2117,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::VSTMSIA:
   case ARM::VSTMSIA_UPD:
   case ARM::VSTMSDB_UPD:
-    UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
     break;
 
   case ARM::STMIA:
@@ -2137,7 +2136,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::t2STMDB:
   case ARM::t2STMIA_UPD:
   case ARM::t2STMDB_UPD:
-    UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
     break;
   }
 
@@ -2150,7 +2149,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     if (LdmBypass) {
       // It's a variable_ops instruction so we can't use DefIdx here. Just use
       // first def operand.
-      if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
                                           UseClass, UseIdx))
         --UseCycle;
     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
@@ -2170,11 +2169,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
       DefMI->isRegSequence() || DefMI->isImplicitDef())
     return 1;
 
-  const TargetInstrDesc &DefTID = DefMI->getDesc();
+  const MCInstrDesc &DefMCID = DefMI->getDesc();
   if (!ItinData || ItinData->isEmpty())
-    return DefTID.mayLoad() ? 3 : 1;
+    return DefMCID.mayLoad() ? 3 : 1;
 
-  const TargetInstrDesc &UseTID = UseMI->getDesc();
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
   if (DefMO.getReg() == ARM::CPSR) {
     if (DefMI->getOpcode() == ARM::FMSTAT) {
@@ -2183,7 +2182,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     }
 
     // CPSR set and branch can be paired in the same cycle.
-    if (UseTID.isBranch())
+    if (UseMCID.isBranch())
       return 0;
   }
 
@@ -2191,14 +2190,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   unsigned UseAlign = UseMI->hasOneMemOperand()
     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                                  UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2223,7 +2222,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLD1q8:
     case ARM::VLD1q16:
@@ -2327,37 +2326,37 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   if (!DefNode->isMachineOpcode())
     return 1;
 
-  const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
 
-  if (isZeroCost(DefTID.Opcode))
+  if (isZeroCost(DefMCID.Opcode))
     return 0;
 
   if (!ItinData || ItinData->isEmpty())
-    return DefTID.mayLoad() ? 3 : 1;
+    return DefMCID.mayLoad() ? 3 : 1;
 
   if (!UseNode->isMachineOpcode()) {
-    int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
     if (Subtarget.isCortexA9())
       return Latency <= 2 ? 1 : Latency - 1;
     else
       return Latency <= 3 ? 1 : Latency - 2;
   }
 
-  const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
   unsigned DefAlign = !DefMN->memoperands_empty()
     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
   unsigned UseAlign = !UseMN->memoperands_empty()
     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                                  UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2384,7 +2383,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLD1q8Pseudo:
     case ARM::VLD1q16Pseudo:
@@ -2503,10 +2502,10 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Class = TID.getSchedClass();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Class = MCID.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+  if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
     // When predicated, CPSR is an additional source operand for CPSR updating
     // instructions, this apparently increases their latencies.
     *PredCost = 1;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 9a2faf8f9aae..507e8974bf7b 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -20,6 +20,9 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseRegisterInfo;
@@ -36,24 +39,16 @@ namespace ARMII {
     // This four-bit field describes the addressing mode used.
     AddrModeMask  = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
 
-    // Size* - Flags to keep track of the size of an instruction.
-    SizeShift     = 5,
-    SizeMask      = 7 << SizeShift,
-    SizeSpecial   = 1,   // 0 byte pseudo or special case.
-    Size8Bytes    = 2,
-    Size4Bytes    = 3,
-    Size2Bytes    = 4,
-
     // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
     // and store ops only.  Generic "updating" flag is used for ld/st multiple.
     // The index mode enums are declared in ARMBaseInfo.h
-    IndexModeShift = 8,
+    IndexModeShift = 5,
     IndexModeMask  = 3 << IndexModeShift,
 
     //===------------------------------------------------------------------===//
     // Instruction encoding formats.
     //
-    FormShift     = 10,
+    FormShift     = 7,
     FormMask      = 0x3f << FormShift,
 
     // Pseudo instructions
@@ -126,15 +121,15 @@ namespace ARMII {
 
     // UnaryDP - Indicates this is a unary data processing instruction, i.e.
     // it doesn't have a Rn operand.
-    UnaryDP       = 1 << 16,
+    UnaryDP       = 1 << 13,
 
     // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
     // a 16-bit Thumb instruction if certain conditions are met.
-    Xform16Bit    = 1 << 17,
+    Xform16Bit    = 1 << 14,
 
     //===------------------------------------------------------------------===//
     // Code domain.
-    DomainShift   = 18,
+    DomainShift   = 15,
     DomainMask    = 7 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
@@ -172,7 +167,7 @@ namespace ARMII {
   };
 }
 
-class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
   const ARMSubtarget &Subtarget;
 
 protected:
@@ -291,8 +286,8 @@ public:
                                        int64_t &Offset1, int64_t &Offset2)const;
 
   /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
-  /// be scheduled togther. On some targets if two loads are loading from
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+  /// should be scheduled togther. On some targets if two loads are loading from
   /// addresses in the same cache line, it's better if they are scheduled
   /// together. This function takes two integers that represent the load offsets
   /// from the common base address. It returns true if it decides it's desirable
@@ -308,18 +303,18 @@ public:
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
                                    unsigned NumCycles, unsigned ExtraPredCycles,
-                                   float Prob, float Confidence) const;
+                                   const BranchProbability &Probability) const;
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
                                    unsigned NumT, unsigned ExtraT,
                                    MachineBasicBlock &FMBB,
                                    unsigned NumF, unsigned ExtraF,
-                                   float Probability, float Confidence) const;
+                                   const BranchProbability &Probability) const;
 
   virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
                                          unsigned NumCycles,
-                                         float Probability,
-                                         float Confidence) const {
+                                         const BranchProbability
+                                           &Probability) const {
     return NumCycles == 1;
   }
 
@@ -353,25 +348,25 @@ public:
                         SDNode *UseNode, unsigned UseIdx) const;
 private:
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
-                      const TargetInstrDesc &DefTID,
+                      const MCInstrDesc &DefMCID,
                       unsigned DefClass,
                       unsigned DefIdx, unsigned DefAlign) const;
   int getLDMDefCycle(const InstrItineraryData *ItinData,
-                     const TargetInstrDesc &DefTID,
+                     const MCInstrDesc &DefMCID,
                      unsigned DefClass,
                      unsigned DefIdx, unsigned DefAlign) const;
   int getVSTMUseCycle(const InstrItineraryData *ItinData,
-                      const TargetInstrDesc &UseTID,
+                      const MCInstrDesc &UseMCID,
                       unsigned UseClass,
                       unsigned UseIdx, unsigned UseAlign) const;
   int getSTMUseCycle(const InstrItineraryData *ItinData,
-                     const TargetInstrDesc &UseTID,
+                     const MCInstrDesc &UseMCID,
                      unsigned UseClass,
                      unsigned UseIdx, unsigned UseAlign) const;
   int getOperandLatency(const InstrItineraryData *ItinData,
-                        const TargetInstrDesc &DefTID,
+                        const MCInstrDesc &DefMCID,
                         unsigned DefIdx, unsigned DefAlign,
-                        const TargetInstrDesc &UseTID,
+                        const MCInstrDesc &UseMCID,
                         unsigned UseIdx, unsigned UseAlign) const;
 
   int getInstrLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 4ab37f6cc759..ba422952ac1a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,6 +40,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -54,8 +57,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
 
 ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
                                          const ARMSubtarget &sti)
-  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
-    TII(tii), STI(sti),
+  : ARMGenRegisterInfo(), TII(tii), STI(sti),
     FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
     BasePtr(ARM::R6) {
 }
@@ -100,6 +102,12 @@ getReservedRegs(const MachineFunction &MF) const {
   // Some targets reserve R9.
   if (STI.isR9Reserved())
     Reserved.set(ARM::R9);
+  // Reserve D16-D31 if the subtarget doesn't support them.
+  if (!STI.hasVFP3() || STI.hasD16()) {
+    assert(ARM::D31 == ARM::D16 + 15);
+    for (unsigned i = 0; i != 16; ++i)
+      Reserved.set(ARM::D16 + i);
+  }
   return Reserved;
 }
 
@@ -387,12 +395,12 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-/// getAllocationOrder - Returns the register allocation order for a specified
-/// register class in the form of a pair of TargetRegisterClass iterators.
-std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
-                                        unsigned HintType, unsigned HintReg,
-                                        const MachineFunction &MF) const {
+/// getRawAllocationOrder - Returns the register allocation order for a
+/// specified register class with a target-dependent hint.
+ArrayRef<unsigned>
+ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   // Alternative register allocation orders when favoring even / odd registers
   // of register pairs.
@@ -469,70 +477,54 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
 
   // We only support even/odd hints for GPR and rGPR.
   if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
-    return std::make_pair(RC->allocation_order_begin(MF),
-                          RC->allocation_order_end(MF));
+    return RC->getRawAllocationOrder(MF);
 
   if (HintType == ARMRI::RegPairEven) {
     if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
       // It's no longer possible to fulfill this hint. Return the default
       // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
+      return RC->getRawAllocationOrder(MF);
 
     if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven1,
-                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven1);
       else
-        return std::make_pair(GPREven4,
-                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven4);
     } else if (FramePtr == ARM::R7) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven2,
-                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven2);
       else
-        return std::make_pair(GPREven5,
-                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven5);
     } else { // FramePtr == ARM::R11
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven3,
-                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven3);
       else
-        return std::make_pair(GPREven6,
-                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven6);
     }
   } else if (HintType == ARMRI::RegPairOdd) {
     if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
       // It's no longer possible to fulfill this hint. Return the default
       // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
+      return RC->getRawAllocationOrder(MF);
 
     if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd1,
-                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd1);
       else
-        return std::make_pair(GPROdd4,
-                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd4);
     } else if (FramePtr == ARM::R7) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd2,
-                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd2);
       else
-        return std::make_pair(GPROdd5,
-                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd5);
     } else { // FramePtr == ARM::R11
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd3,
-                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd3);
       else
-        return std::make_pair(GPROdd6,
-                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd6);
     }
   }
-  return std::make_pair(RC->allocation_order_begin(MF),
-                        RC->allocation_order_end(MF));
+  return RC->getRawAllocationOrder(MF);
 }
 
 /// ResolveRegAllocHint - Resolves the specified register allocation hint
@@ -965,7 +957,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
 int64_t ARMBaseRegisterInfo::
 getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   int64_t InstrOffs = 0;;
   int Scale = 1;
@@ -1115,11 +1107,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
 
-  const TargetInstrDesc &TID = TII.get(ADDriOpc);
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  MRI.constrainRegClass(BaseReg, TID.OpInfo[0].getRegClass(this));
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
 
-  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, TID, BaseReg)
+  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
     .addFrameIndex(FrameIdx).addImm(Offset);
 
   if (!AFI->isThumb1OnlyFunction())
@@ -1155,7 +1147,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
 
 bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                              int64_t Offset) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   unsigned i = 0;
 
@@ -1291,11 +1283,5 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
     // Update the original instruction to use the scratch register.
     MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-    if (MI.getOpcode() == ARM::t2ADDrSPi)
-      MI.setDesc(TII.get(ARM::t2ADDri));
-    else if (MI.getOpcode() == ARM::t2SUBrSPi)
-      MI.setDesc(TII.get(ARM::t2SUBri));
   }
 }
-
-#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index c60d75a6893d..b4b4059e7361 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -16,7 +16,9 @@
 
 #include "ARM.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "ARMGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
 
 namespace llvm {
   class ARMSubtarget;
@@ -134,10 +136,9 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const;
 
-  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-  getAllocationOrder(const TargetRegisterClass *RC,
-                     unsigned HintType, unsigned HintReg,
-                     const MachineFunction &MF) const;
+  ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const;
 
   unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
                                const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 16d4ca599a06..d6fca6277501 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -96,13 +96,13 @@ namespace {
     void addPCLabel(unsigned LabelID);
     void emitPseudoInstruction(const MachineInstr &MI);
     unsigned getMachineSoRegOpValue(const MachineInstr &MI,
-                                    const TargetInstrDesc &TID,
+                                    const MCInstrDesc &MCID,
                                     const MachineOperand &MO,
                                     unsigned OpIdx);
 
     unsigned getMachineSoImmOpValue(unsigned SoImm);
     unsigned getAddrModeSBit(const MachineInstr &MI,
-                             const TargetInstrDesc &TID) const;
+                             const MCInstrDesc &MCID) const;
 
     void emitDataProcessingInstruction(const MachineInstr &MI,
                                        unsigned ImplicitRd = 0,
@@ -443,9 +443,9 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   else if (MO.isSymbol())
     emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
   else if (MO.isCPI()) {
-    const TargetInstrDesc &TID = MI.getDesc();
+    const MCInstrDesc &MCID = MI.getDesc();
     // For VFP load, the immediate offset is multiplied by 4.
-    unsigned Reloc =  ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+    unsigned Reloc =  ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
       ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
     emitConstPoolAddress(MO.getIndex(), Reloc);
   } else if (MO.isJTI())
@@ -757,7 +757,7 @@ void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
 void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   // It's basically add r, pc, (LJTI - $+8)
 
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Emit the 'add' instruction.
   unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
@@ -766,7 +766,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // Encode Rd.
   Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
@@ -912,7 +912,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
 }
 
 unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
-                                                const TargetInstrDesc &TID,
+                                                const MCInstrDesc &MCID,
                                                 const MachineOperand &MO,
                                                 unsigned OpIdx) {
   unsigned Binary = getMachineOpValue(MI, MO);
@@ -982,8 +982,8 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
 }
 
 unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
-                                         const TargetInstrDesc &TID) const {
-  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){
+                                         const MCInstrDesc &MCID) const {
+  for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
       return 1 << ARMII::S_BitShift;
@@ -994,7 +994,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
 void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
                                                    unsigned ImplicitRd,
                                                    unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1003,10 +1003,10 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // Encode register def if there is one.
-  unsigned NumDefs = TID.getNumDefs();
+  unsigned NumDefs = MCID.getNumDefs();
   unsigned OpIdx = 0;
   if (NumDefs)
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
@@ -1014,7 +1014,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
     // Special handling for implicit use (e.g. PC).
     Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
 
-  if (TID.Opcode == ARM::MOVi16) {
+  if (MCID.Opcode == ARM::MOVi16) {
       // Get immediate from MI.
       unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
                       ARM::reloc_arm_movw);
@@ -1023,14 +1023,14 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
       Binary |= ((Lo16 >> 12) & 0xF) << 16;
       emitWordLE(Binary);
       return;
-  } else if(TID.Opcode == ARM::MOVTi16) {
+  } else if(MCID.Opcode == ARM::MOVTi16) {
       unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
                        ARM::reloc_arm_movt) >> 16);
       Binary |= Hi16 & 0xFFF;
       Binary |= ((Hi16 >> 12) & 0xF) << 16;
       emitWordLE(Binary);
       return;
-  } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+  } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
       uint32_t v = ~MI.getOperand(2).getImm();
       int32_t lsb = CountTrailingZeros_32(v);
       int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
@@ -1039,7 +1039,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
       Binary |= (lsb & 0x1F) << 7;
       emitWordLE(Binary);
       return;
-  } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+  } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
       // Encode Rn in Instr{0-3}
       Binary |= getMachineOpValue(MI, OpIdx++);
 
@@ -1054,11 +1054,11 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   }
 
   // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   // Encode first non-shifter register operand if there is one.
-  bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+  bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
   if (!isUnary) {
     if (ImplicitRn)
       // Special handling for implicit use (e.g. PC).
@@ -1071,9 +1071,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 
   // Encode shifter operand.
   const MachineOperand &MO = MI.getOperand(OpIdx);
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
     // Encode SoReg.
-    emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+    emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
     return;
   }
 
@@ -1092,9 +1092,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
                                               unsigned ImplicitRd,
                                               unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1134,7 +1134,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // If this is a two-address operand, skip it. e.g. LDR_PRE.
-  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   const MachineOperand &MO2 = MI.getOperand(OpIdx);
@@ -1170,9 +1170,9 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
 
 void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
                                                   unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1194,7 +1194,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   // Skip LDRD and STRD's second operand.
-  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+  if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
     ++OpIdx;
 
   // Set second operand
@@ -1205,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // If this is a two-address operand, skip it. e.g. LDRH_POST.
-  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   const MachineOperand &MO2 = MI.getOperand(OpIdx);
@@ -1255,8 +1255,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) {
 }
 
 void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1295,7 +1295,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1304,12 +1304,12 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // 32x32->64bit operations have two destination registers. The number
   // of register definitions will tell us if that's what we're dealing with.
   unsigned OpIdx = 0;
-  if (TID.getNumDefs() == 2)
+  if (MCID.getNumDefs() == 2)
     Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
 
   // Encode Rd
@@ -1323,16 +1323,16 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
 
   // Many multiple instructions (e.g. MLA) have three src operands. Encode
   // it as Rn (for multiply, that's in the same offset as RdLo.
-  if (TID.getNumOperands() > OpIdx &&
-      !TID.OpInfo[OpIdx].isPredicate() &&
-      !TID.OpInfo[OpIdx].isOptionalDef())
+  if (MCID.getNumOperands() > OpIdx &&
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
     Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
 
   emitWordLE(Binary);
 }
 
 void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1361,15 +1361,15 @@ void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
 
   // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
   if (MI.getOperand(OpIdx).isImm() &&
-      !TID.OpInfo[OpIdx].isPredicate() &&
-      !TID.OpInfo[OpIdx].isOptionalDef())
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
     Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
 
   emitWordLE(Binary);
 }
 
 void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1378,7 +1378,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // PKH instructions are finished at this point
-  if (TID.Opcode == ARM::PKHBT || TID.Opcode == ARM::PKHTB) {
+  if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
     emitWordLE(Binary);
     return;
   }
@@ -1389,9 +1389,9 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   const MachineOperand &MO = MI.getOperand(OpIdx++);
-  if (OpIdx == TID.getNumOperands() ||
-      TID.OpInfo[OpIdx].isPredicate() ||
-      TID.OpInfo[OpIdx].isOptionalDef()) {
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
     // Encode Rm and it's done.
     Binary |= getMachineOpValue(MI, MO);
     emitWordLE(Binary);
@@ -1406,7 +1406,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
 
   // Encode shift_imm.
   unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
-  if (TID.Opcode == ARM::PKHTB) {
+  if (MCID.Opcode == ARM::PKHTB) {
     assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
     if (ShiftAmt == 32)
       ShiftAmt = 0;
@@ -1418,7 +1418,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGen.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1431,11 +1431,11 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
 
   // Encode saturate bit position.
   unsigned Pos = MI.getOperand(1).getImm();
-  if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+  if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
     Pos -= 1;
   assert((Pos < 16 || (Pos < 32 &&
-                       TID.Opcode != ARM::SSAT16 &&
-                       TID.Opcode != ARM::USAT16)) &&
+                       MCID.Opcode != ARM::SSAT16 &&
+                       MCID.Opcode != ARM::USAT16)) &&
          "saturate bit position out of range");
   Binary |= Pos << 16;
 
@@ -1443,7 +1443,7 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, 2);
 
   // Encode shift_imm.
-  if (TID.getNumOperands() == 4) {
+  if (MCID.getNumOperands() == 4) {
     unsigned ShiftOp = MI.getOperand(3).getImm();
     ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
     if (Opc == ARM_AM::asr)
@@ -1459,9 +1459,9 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
-  if (TID.Opcode == ARM::TPsoft) {
+  if (MCID.Opcode == ARM::TPsoft) {
     llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
   }
 
@@ -1498,20 +1498,20 @@ void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
 }
 
 void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Handle jump tables.
-  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+  if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
     // First emit a ldr pc, [] instruction.
     emitDataProcessingInstruction(MI, ARM::PC);
 
     // Then emit the inline jump table.
     unsigned JTIndex =
-      (TID.Opcode == ARM::BR_JTr)
+      (MCID.Opcode == ARM::BR_JTr)
       ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
     emitInlineJumpTable(JTIndex);
     return;
-  } else if (TID.Opcode == ARM::BR_JTm) {
+  } else if (MCID.Opcode == ARM::BR_JTm) {
     // First emit a ldr pc, [] instruction.
     emitLoadStoreInstruction(MI, ARM::PC);
 
@@ -1526,7 +1526,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
-  if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
+  if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
     // The return register is LR.
     Binary |= getARMRegisterNumbering(ARM::LR);
   else
@@ -1579,7 +1579,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
 }
 
 void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1596,16 +1596,16 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
   Binary |= encodeVFPRd(MI, OpIdx++);
 
   // If this is a two-address operand, skip it, e.g. FMACD.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   // Encode Dn / Sn.
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
     Binary |= encodeVFPRn(MI, OpIdx++);
 
-  if (OpIdx == TID.getNumOperands() ||
-      TID.OpInfo[OpIdx].isPredicate() ||
-      TID.OpInfo[OpIdx].isOptionalDef()) {
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
     // FCMPEZD etc. has only one operand.
     emitWordLE(Binary);
     return;
@@ -1618,8 +1618,8 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1709,8 +1709,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
 
 void
 ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1795,8 +1795,8 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
   unsigned Binary = getBinaryCodeForInstr(MI);
 
   unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
-  const TargetInstrDesc &TID = MI.getDesc();
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
     RegTOpIdx = 0;
     RegNOpIdx = 1;
     LnOpIdx = 2;
@@ -1863,12 +1863,12 @@ void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
   unsigned Binary = getBinaryCodeForInstr(MI);
   // Destination register is encoded in Dd; source register in Dm.
   unsigned OpIdx = 0;
   Binary |= encodeNEONRd(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRm(MI, OpIdx);
   if (IsThumb)
@@ -1878,15 +1878,15 @@ void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
   unsigned Binary = getBinaryCodeForInstr(MI);
   // Destination register is encoded in Dd; source registers in Dn and Dm.
   unsigned OpIdx = 0;
   Binary |= encodeNEONRd(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRn(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRm(MI, OpIdx);
   if (IsThumb)
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index baf95a33dd4b..f45ebdc53500 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1538,7 +1538,10 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
     if (MI->getOpcode() == ARM::tPOP_RET &&
         MI->getOperand(2).getReg() == ARM::PC &&
         MI->getNumExplicitOperands() == 3) {
-      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+      // Create the new insn and copy the predicate from the old.
+      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+        .addOperand(MI->getOperand(0))
+        .addOperand(MI->getOperand(1));
       MI->eraseFromParent();
       MadeChange = true;
     }
@@ -1692,9 +1695,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
-    const TargetInstrDesc &TID = MI->getDesc();
-    unsigned NumOps = TID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1815,9 +1818,9 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
-    const TargetInstrDesc &TID = MI->getDesc();
-    unsigned NumOps = TID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index b6b3c75943b5..94b72fdb9a7e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -68,7 +68,7 @@ namespace {
 void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
                                      MachineInstrBuilder &UseMI,
                                      MachineInstrBuilder &DefMI) {
-  const TargetInstrDesc &Desc = OldMI.getDesc();
+  const MCInstrDesc &Desc = OldMI.getDesc();
   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = OldMI.getOperand(i);
@@ -727,8 +727,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::t2MOVCCr:
     case ARM::MOVCCr: {
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr),
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
               MI.getOperand(1).getReg())
         .addReg(MI.getOperand(2).getReg(),
                 getKillRegState(MI.getOperand(2).isKill()))
@@ -764,8 +766,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::t2MOVCCi:
     case ARM::MOVCCi: {
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi),
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
               MI.getOperand(1).getReg())
         .addImm(MI.getOperand(2).getImm())
         .addImm(MI.getOperand(3).getImm()) // 'pred'
@@ -837,8 +841,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                              MI.getOperand(0).getReg())
                      .addOperand(MI.getOperand(1))
                      .addReg(0)
-                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
-                                                  : ARM_AM::asr), 1)))
+                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+                                                  ARM_AM::lsr : ARM_AM::asr),
+                                                 1)))
         .addReg(ARM::CPSR, RegState::Define);
       MI.eraseFromParent();
       return true;
@@ -856,10 +861,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::tTPsoft:
     case ARM::TPsoft: {
       MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                TII->get(ARM::BL))
+                TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
         .addExternalSymbol("__aeabi_read_tp", 0);
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -900,10 +906,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       const MachineOperand &MO1 = MI.getOperand(1);
       const GlobalValue *GV = MO1.getGlobal();
       unsigned TF = MO1.getTargetFlags();
-      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn);
+      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
       bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
       unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
-      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
       unsigned LO16TF = isPIC
         ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
       unsigned HI16TF = isPIC
@@ -958,15 +964,17 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
       MachineInstrBuilder Even =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::VMOVQ))
+                               TII->get(ARM::VORRq))
                        .addReg(EvenDst,
                                RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill))
                        .addReg(EvenSrc, getKillRegState(SrcIsKill)));
       MachineInstrBuilder Odd =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::VMOVQ))
+                               TII->get(ARM::VORRq))
                        .addReg(OddDst,
                                RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill))
                        .addReg(OddSrc, getKillRegState(SrcIsKill)));
       TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5cf73c479a33..f469d7efe11a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -219,8 +219,8 @@ class ARMFastISel : public FastISel {
 // we don't care about implicit defs here, just places we'll need to add a
 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.hasOptionalDef())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.hasOptionalDef())
     return false;
 
   // Look to see if our OptionalDef is defining CPSR or CCR.
@@ -234,15 +234,15 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 }
 
 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
 
   // If we're a thumb2 or not NEON function we were handled via isPredicable.
-  if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
        AFI->isThumb2Function())
     return false;
 
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
-    if (TID.OpInfo[i].isPredicate())
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+    if (MCID.OpInfo[i].isPredicate())
       return true;
 
   return false;
@@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
                                     const TargetRegisterClass* RC) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
   return ResultReg;
@@ -288,7 +288,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -308,7 +308,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -331,7 +331,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
                                        unsigned Op1, bool Op1IsKill,
                                        unsigned Op2, bool Op2IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -355,7 +355,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -377,7 +377,7 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -400,7 +400,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
                                        unsigned Op1, bool Op1IsKill,
                                        uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -423,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -442,7 +442,7 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1549,7 +1549,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   NumBytes = CCInfo.getNextStackOffset();
 
   // Issue CALLSEQ_START
-  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(AdjStackDown))
                   .addImm(NumBytes));
@@ -1647,7 +1647,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
                              const Instruction *I, CallingConv::ID CC,
                              unsigned &NumBytes) {
   // Issue CALLSEQ_END
-  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(AdjStackUp))
                   .addImm(NumBytes).addImm(0));
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index e2e95d47b37b..381b404519e2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -268,14 +268,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       // bic r4, r4, MaxAlign
       // mov sp, r4
       // FIXME: It will be better just to find spare register here.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
-        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill));
       AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
                                           TII.get(ARM::t2BICri), ARM::R4)
                                   .addReg(ARM::R4, RegState::Kill)
                                   .addImm(MaxAlign-1)));
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-        .addReg(ARM::R4, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill));
     }
 
     AFI->setShouldRestoreSPFromFP(true);
@@ -293,9 +293,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(ARM::SP)
         .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
     else
-      BuildMI(MBB, MBBI, dl,
-              TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
-        .addReg(ARM::SP);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                             RegInfo->getBaseRegister())
+        .addReg(ARM::SP));
   }
 
   // If the frame has variable sized objects then the epilogue must restore
@@ -364,8 +364,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                  "No scratch register to restore SP from FP!");
           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  ARMCC::AL, 0, TII);
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(ARM::R4);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(ARM::R4));
         }
       } else {
         // Thumb2 or ARM.
@@ -373,8 +374,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
           BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
             .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
         else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(FramePtr);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(FramePtr));
       }
     } else if (NumBytes)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -427,6 +429,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
+    MBBI = NewMI;
   }
 
   if (VARegSaveSize)
@@ -736,20 +739,52 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
 /// estimateStackSize - Estimate and return the size of the frame.
 /// FIXME: Make generic?
 static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
   int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
     if (FixedOff > Offset) Offset = FixedOff;
   }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
       continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
     // Adjust to alignment boundary
     Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
   }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
   return (unsigned)Offset;
 }
 
@@ -841,9 +876,14 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     if (AFI->getVarArgsRegSaveSize() > 0)
       MF.getRegInfo().setPhysRegUsed(ARM::LR);
 
-    // Spill R4 if Thumb1 epilogue has to restore SP from FP since 
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+    // for sure what the stack size will be, but for this, an estimate is good
+    // enough. If there anything changes it, it'll be a spill, which implies
+    // we've used all the registers and so R4 is already used, so not marking
+    // it here will be OK.
     // FIXME: It will be better just to find spare register here.
-    if (MFI->hasVarSizedObjects())
+    unsigned StackSize = estimateStackSize(MF);
+    if (MFI->hasVarSizedObjects() || StackSize > 508)
       MF.getRegInfo().setPhysRegUsed(ARM::R4);
   }
 
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
index 3f0238387a2b..8d77b2d8383e 100644
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -128,10 +128,10 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   for (size_t i = 0, e = Globals.size(); i != e; ) {
     size_t j = 0;
     uint64_t MergedSize = 0;
-    std::vector<const Type*> Tys;
+    std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
     for (j = i; j != e; ++j) {
-      const Type *Ty = Globals[j]->getType()->getElementType();
+      Type *Ty = Globals[j]->getType()->getElementType();
       MergedSize += TD->getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
         break;
@@ -175,7 +175,9 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
       continue;
 
     // Ignore fancy-aligned globals for now.
-    if (I->getAlignment() != 0)
+    unsigned Alignment = I->getAlignment();
+    const Type *Ty = I->getType()->getElementType();
+    if (Alignment > TD->getABITypeAlignment(Ty))
       continue;
 
     // Ignore all 'special' globals.
@@ -183,7 +185,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
         I->getName().startswith(".llvm."))
       continue;
 
-    if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
       const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
       if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
         BSSGlobals.push_back(I);
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 517bba8cee8e..787f6a279187 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -19,11 +19,11 @@ using namespace llvm;
 static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
                          const TargetRegisterInfo &TRI) {
   // FIXME: Detect integer instructions properly.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
     return false;
-  unsigned Opcode = TID.getOpcode();
+  unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
     return false;
   if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
@@ -43,15 +43,15 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 
     // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
     // a VMLA / VMLS will cause 4 cycle stall.
-    const TargetInstrDesc &TID = MI->getDesc();
-    if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
       MachineInstr *DefMI = LastMI;
-      const TargetInstrDesc &LastTID = LastMI->getDesc();
+      const MCInstrDesc &LastMCID = LastMI->getDesc();
       // Skip over one non-VFP / NEON instruction.
-      if (!LastTID.isBarrier() &&
+      if (!LastMCID.isBarrier() &&
           // On A9, AGU and NEON/FPU are muxed.
-          !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) &&
-          (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+          !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+          (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
           I = llvm::prior(I);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad516dafb91..2c9481b86c55 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -329,10 +329,10 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
   if (Use->getOpcode() == ISD::CopyToReg)
     return true;
   if (Use->isMachineOpcode()) {
-    const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
-    if (TID.mayStore())
+    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+    if (MCID.mayStore())
       return true;
-    unsigned Opcode = TID.getOpcode();
+    unsigned Opcode = MCID.getOpcode();
     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
       return true;
     // vmlx feeding into another vmlx. We actually want to unfold
@@ -1354,30 +1354,34 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
 ///
 SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// PairDRegs - Form a quad register from a pair of D registers.
 ///
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
 ///
 SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// QuadSRegs - Form 4 consecutive S registers.
@@ -1385,12 +1389,15 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
 SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// QuadDRegs - Form 4 consecutive D registers.
@@ -1398,12 +1405,14 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// QuadQRegs - Form 4 consecutive Q registers.
@@ -1411,12 +1420,14 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 339c85886aea..cf8c5baa8e7d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     setTargetDAGCombine(ISD::STORE);
+    setTargetDAGCombine(ISD::FP_TO_SINT);
+    setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FDIV);
   }
 
   computeRegisterProperties();
@@ -538,7 +541,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
   }
-  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops())
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
     setOperationAction(ISD::MULHS, MVT::i32, Expand);
 
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -704,6 +708,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   // Various VFP goodness
   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
@@ -974,12 +981,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   // Load are scheduled for latency even if there instruction itinerary
   // is not available.
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
 
-  if (TID.getNumDefs() == 0)
+  if (MCID.getNumDefs() == 0)
     return Sched::RegPressure;
   if (!Itins->isEmpty() &&
-      Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
+      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
     return Sched::Latency;
 
   return Sched::RegPressure;
@@ -1633,7 +1640,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     return false;
 
   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
-  // emitEpilogue is not ready for them.
+  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+  // support in the assembler and linker to be used. This would need to be
+  // fixed to fully support tail calls in Thumb1.
+  //
   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   // LR.  This means if we need to reload LR, it takes an extra instructions,
   // which outweighs the value of the tail call; but here we don't know yet
@@ -2281,12 +2292,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
     // ARMv7 with MP extension has PLDW.
     return Op.getOperand(0);
 
-  if (Subtarget->isThumb())
+  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+  if (Subtarget->isThumb()) {
     // Invert the bits.
     isRead = ~isRead & 1;
-  unsigned isData = Subtarget->isThumb() ? 0 : 1;
+    isData = ~isData & 1;
+  }
 
-  // Currently there is no intrinsic that matches pli.
   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
                      DAG.getConstant(isData, MVT::i32));
@@ -2742,7 +2754,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
@@ -5522,12 +5534,108 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   return SDValue();
 }
 
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+
+  // Only perform optimization if after legalize, and if NEON is available. We
+  // also expected both operands to be BUILD_VECTORs.
+  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+      || N0.getOpcode() != ISD::BUILD_VECTOR
+      || N1.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+  EVT VT = N->getValueType(0);
+  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+    return SDValue();
+
+  // Check that the vector operands are of the right form.
+  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+  // operands, where N is the size of the formed vector.
+  // Each EXTRACT_VECTOR should have the same input vector and odd or even
+  // index such that we have a pair wise add pattern.
+
+  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+  SDValue Vec = N0->getOperand(0)->getOperand(0);
+  SDNode *V = Vec.getNode();
+  unsigned nextIndex = 0;
+
+  // For each operands to the ADD which are BUILD_VECTORs,
+  // check to see if each of their operands are an EXTRACT_VECTOR with
+  // the same vector and appropriate index.
+  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+      SDValue ExtVec0 = N0->getOperand(i);
+      SDValue ExtVec1 = N1->getOperand(i);
+
+      // First operand is the vector, verify its the same.
+      if (V != ExtVec0->getOperand(0).getNode() ||
+          V != ExtVec1->getOperand(0).getNode())
+        return SDValue();
+
+      // Second is the constant, verify its correct.
+      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+      // For the constant, we want to see all the even or all the odd.
+      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+          || C1->getZExtValue() != nextIndex+1)
+        return SDValue();
+
+      // Increment index.
+      nextIndex+=2;
+    } else
+      return SDValue();
+  }
+
+  // Create VPADDL node.
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Build operand list.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+                                TLI.getPointerTy()));
+
+  // Input is the vector.
+  Ops.push_back(Vec);
+
+  // Get widened type and narrowed type.
+  MVT widenType;
+  unsigned numElem = VT.getVectorNumElements();
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+    default:
+      assert(0 && "Invalid vector element type for padd optimization.");
+  }
+
+  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                            widenType, &Ops[0], Ops.size());
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
 /// called with the default operands, and if that fails, with commuted
 /// operands.
 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const ARMSubtarget *Subtarget){
+
+  // Attempt to create vpaddl for this add.
+  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+  if (Result.getNode())
+    return Result;
+
   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
@@ -5539,17 +5647,18 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
 ///
 static SDValue PerformADDCombine(SDNode *N,
-                                 TargetLowering::DAGCombinerInfo &DCI) {
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
   // First try with the default operand order.
-  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
   if (Result.getNode())
     return Result;
 
   // If that didn't work, try again with the operands commuted.
-  return PerformADDCombineWithOperands(N, N1, N0, DCI);
+  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
 }
 
 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
@@ -5588,7 +5697,7 @@ static SDValue PerformVMULCombine(SDNode *N,
   unsigned Opcode = N0.getOpcode();
   if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
       Opcode != ISD::FADD && Opcode != ISD::FSUB) {
-    Opcode = N0.getOpcode();
+    Opcode = N1.getOpcode();
     if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
         Opcode != ISD::FADD && Opcode != ISD::FSUB)
       return SDValue();
@@ -5874,8 +5983,8 @@ static SDValue PerformORCombine(SDNode *N,
   return SDValue();
 }
 
-/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
-/// C1 & C2 == C1.
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
 static SDValue PerformBFICombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
   SDValue N1 = N->getOperand(1);
@@ -5883,9 +5992,12 @@ static SDValue PerformBFICombine(SDNode *N,
     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
     if (!N11C)
       return SDValue();
-    unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(~InvMask);
+    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+    unsigned Mask = (1 << Width)-1;
     unsigned Mask2 = N11C->getZExtValue();
-    if ((Mask & Mask2) == Mask2)
+    if ((Mask & (~Mask2)) == 0)
       return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
                              N->getOperand(0), N1.getOperand(0),
                              N->getOperand(2));
@@ -6378,7 +6490,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
   return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
 }
 
-/// getVShiftImm - Check if this is a valid build_vector for the immediate
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+  integerPart cN;
+  integerPart c0 = 0;
+  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+       I != E; I++) {
+    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+    if (!C)
+      return false;
+
+    bool isExact;
+    APFloat APF = C->getValueAPF();
+    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+        != APFloat::opOK || !isExact)
+      return false;
+
+    c0 = (I == 0) ? cN : c0;
+    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+      return false;
+  }
+  C = c0;
+  return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vmul.f32        d16, d17, d16
+///  vcvt.s32.f32    d16, d16
+/// becomes:
+///  vcvt.s32.f32    d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+
+  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+      Op.getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  uint64_t C;
+  SDValue N0 = Op->getOperand(0);
+  SDValue ConstVec = Op->getOperand(1);
+  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+    Intrinsic::arm_neon_vcvtfp2fxu;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     N->getValueType(0),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+                     DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vcvt.f32.s32    d16, d16
+///  vdiv.f32        d16, d17, d16
+/// becomes:
+///  vcvt.f32.s32    d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+  unsigned OpOpcode = Op.getNode()->getOpcode();
+
+  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+    return SDValue();
+
+  uint64_t C;
+  SDValue ConstVec = N->getOperand(1);
+  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+    Intrinsic::arm_neon_vcvtfxu2fp;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     Op.getValueType(),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
+                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
 /// operand of a vector shift operation, where all the elements of the
 /// build_vector must have the same constant integer value.
 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
@@ -6750,11 +6960,75 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
 }
 
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+  SDValue Cmp = N->getOperand(4);
+  if (Cmp.getOpcode() != ARMISD::CMPZ)
+    // Only looking at EQ and NE cases.
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = Cmp.getOperand(0);
+  SDValue RHS = Cmp.getOperand(1);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal = N->getOperand(1);
+  SDValue ARMcc = N->getOperand(2);
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+  // Simplify
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, y
+  //   moveq   r0, x
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  //
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, x
+  //   movne   r0, y
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  /// FIXME: Turn this into a target neutral optimization?
+  SDValue Res;
+  if (CC == ARMCC::NE && FalseVal == RHS) {
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+                      N->getOperand(3), Cmp);
+  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+    SDValue ARMcc;
+    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+                      N->getOperand(3), NewCmp);
+  }
+
+  if (Res.getNode()) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+    // Capture demanded bits information that would be otherwise lost.
+    if (KnownZero == 0xfffffffe)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i1));
+    else if (KnownZero == 0xffffff00)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i8));
+    else if (KnownZero == 0xffff0000)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i16));
+  }
+
+  return Res;
+}
+
 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
-  case ISD::ADD:        return PerformADDCombine(N, DCI);
+  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
@@ -6767,6 +7041,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
@@ -6775,6 +7052,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
   case ARMISD::VLD2DUP:
   case ARMISD::VLD3DUP:
   case ARMISD::VLD4DUP:
@@ -7277,10 +7555,17 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
     default:  break;
     case 'l': return C_RegisterClass;
     case 'w': return C_RegisterClass;
+    case 'h': return C_RegisterClass;
+    case 'x': return C_RegisterClass;
+    case 't': return C_RegisterClass;
+    case 'j': return C_Other; // Constant for movw.
+    }
+  } else if (Constraint.size() == 2) {
+    switch (Constraint[0]) {
+    default: break;
+    // All 'U+' constraints are addresses.
+    case 'U': return C_Memory;
     }
-  } else {
-    if (Constraint == "Uv")
-      return C_Memory;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -7319,26 +7604,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
   if (Constraint.size() == 1) {
     // GCC ARM Constraint Letters
     switch (Constraint[0]) {
-    case 'l':
+    case 'l': // Low regs or general regs.
       if (Subtarget->isThumb())
-        return std::make_pair(0U, ARM::tGPRRegisterClass);
+        return RCPair(0U, ARM::tGPRRegisterClass);
       else
-        return std::make_pair(0U, ARM::GPRRegisterClass);
+        return RCPair(0U, ARM::GPRRegisterClass);
+    case 'h': // High regs or no regs.
+      if (Subtarget->isThumb())
+	return RCPair(0U, ARM::hGPRRegisterClass);
+      break;
     case 'r':
-      return std::make_pair(0U, ARM::GPRRegisterClass);
+      return RCPair(0U, ARM::GPRRegisterClass);
     case 'w':
       if (VT == MVT::f32)
-        return std::make_pair(0U, ARM::SPRRegisterClass);
+        return RCPair(0U, ARM::SPRRegisterClass);
       if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, ARM::DPRRegisterClass);
+        return RCPair(0U, ARM::DPRRegisterClass);
       if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, ARM::QPRRegisterClass);
+        return RCPair(0U, ARM::QPRRegisterClass);
+      break;
+    case 'x':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPR_8RegisterClass);
+      if (VT.getSizeInBits() == 64)
+	return RCPair(0U, ARM::DPR_8RegisterClass);
+      if (VT.getSizeInBits() == 128)
+	return RCPair(0U, ARM::QPR_8RegisterClass);
+      break;
+    case 't':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPRRegisterClass);
       break;
     }
   }
@@ -7348,47 +7650,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> ARMTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {      // GCC ARM Constraint Letters
-  default: break;
-  case 'l':
-    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-                                 0);
-  case 'r':
-    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
-                                 ARM::R12, ARM::LR, 0);
-  case 'w':
-    if (VT == MVT::f32)
-      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
-                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
-                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
-                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
-                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
-                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
-                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
-                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
-    if (VT.getSizeInBits() == 64)
-      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
-                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
-                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
-                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
-    if (VT.getSizeInBits() == 128)
-      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
-                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
-      break;
-  }
-
-  return std::vector<unsigned>();
-}
-
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -7403,6 +7664,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
+  case 'j':
   case 'I': case 'J': case 'K': case 'L':
   case 'M': case 'N': case 'O':
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
@@ -7417,6 +7679,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       return;
 
     switch (ConstraintLetter) {
+      case 'j':
+	// Constant suitable for movw, must be between 0 and
+	// 65535.
+	if (Subtarget->hasV6T2Ops())
+	  if (CVal >= 0 && CVal <= 65535)
+	    break;
+	return;
       case 'I':
         if (Subtarget->isThumb1Only()) {
           // This must be a constant between 0 and 255, for ADD
@@ -7685,7 +7954,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.ptrVal = I.getArgOperand(2);
     Info.offset = 0;
     Info.align = 8;
-    Info.vol = false;
+    Info.vol = true;
     Info.readMem = false;
     Info.writeMem = true;
     return true;
@@ -7696,7 +7965,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.align = 8;
-    Info.vol = false;
+    Info.vol = true;
     Info.readMem = true;
     Info.writeMem = false;
     return true;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 21a9a3aa746d..980fb404887e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -244,6 +244,7 @@ namespace llvm {
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
 
+    SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
@@ -306,9 +307,6 @@ namespace llvm {
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 897d8a5d79e3..3ccf22f80b7d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -107,16 +107,6 @@ def AddrModeT2_pc   : AddrMode<14>;
 def AddrModeT2_i8s4 : AddrMode<15>;
 def AddrMode_i12    : AddrMode<16>;
 
-// Instruction size.
-class SizeFlagVal<bits<3> val> {
-  bits<3> Value = val;
-}
-def SizeInvalid  : SizeFlagVal<0>;  // Unset.
-def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
-def Size8Bytes   : SizeFlagVal<2>;
-def Size4Bytes   : SizeFlagVal<3>;
-def Size2Bytes   : SizeFlagVal<4>;
-
 // Load / store index mode.
 class IndexMode<bits<2> val> {
   bits<2> Value = val;
@@ -236,13 +226,13 @@ def shr_imm64 : Operand<i32> {
 // ARM Instruction templates.
 //
 
-class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstTemplate<AddrMode am, int sz, IndexMode im,
                    Format f, Domain d, string cstr, InstrItinClass itin>
   : Instruction {
   let Namespace = "ARM";
 
   AddrMode AM = am;
-  SizeFlagVal SZ = sz;
+  int Size = sz;
   IndexMode IM = im;
   bits<2> IndexModeBits = IM.Value;
   Format F = f;
@@ -256,12 +246,11 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
 
   // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
   let TSFlags{4-0}   = AM.Value;
-  let TSFlags{7-5}   = SZ.Value;
-  let TSFlags{9-8}   = IndexModeBits;
-  let TSFlags{15-10} = Form;
-  let TSFlags{16}    = isUnaryDataProc;
-  let TSFlags{17}    = canXformTo16Bit;
-  let TSFlags{20-18} = D.Value;
+  let TSFlags{6-5}   = IndexModeBits;
+  let TSFlags{12-7} = Form;
+  let TSFlags{13}    = isUnaryDataProc;
+  let TSFlags{14}    = canXformTo16Bit;
+  let TSFlags{17-15} = D.Value;
 
   let Constraints = cstr;
   let Itinerary = itin;
@@ -271,53 +260,70 @@ class Encoding {
   field bits<32> Inst;
 }
 
-class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstARM<AddrMode am, int sz, IndexMode im,
               Format f, Domain d, string cstr, InstrItinClass itin>
   : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
 
 // This Encoding-less class is used by Thumb1 to specify the encoding bits later
 // on by adding flavors to specific instructions.
-class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstThumb<AddrMode am, int sz, IndexMode im,
                 Format f, Domain d, string cstr, InstrItinClass itin>
   : InstTemplate<am, sz, im, f, d, cstr, itin>;
 
 class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
-  // FIXME: This really should derive from InstTemplate instead, as pseudos
-  //        don't need encoding information. TableGen doesn't like that
-  //        currently. Need to figure out why and fix it.
-  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
-            "", itin> {
+  : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+                 GenericDomain, "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let Pattern = pattern;
   let isCodeGenOnly = 1;
+  let isPseudo = 1;
 }
 
 // PseudoInst that's ARM-mode only.
-class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsARM];
 }
 
 // PseudoInst that's Thumb-mode only.
-class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsThumb];
 }
 
 // PseudoInst that's Thumb2-mode only.
-class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsThumb2];
 }
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+                      InstrItinClass itin, list<dag> pattern,
+                      dag Result>
+  : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : tPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : t2PseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
 // Almost all ARM instructions are predicable.
-class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class I<dag oops, dag iops, AddrMode am, int sz,
         IndexMode im, Format f, InstrItinClass itin,
         string opc, string asm, string cstr,
         list<dag> pattern>
@@ -332,7 +338,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // A few are not predicable
-class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class InoP<dag oops, dag iops, AddrMode am, int sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr,
            list<dag> pattern>
@@ -348,7 +354,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 // Same as I except it can optionally modify CPSR. Note it's modeled as an input
 // operand since by default it's a zero register. It will become an implicit def
 // once it's "flipped".
-class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class sI<dag oops, dag iops, AddrMode am, int sz,
          IndexMode im, Format f, InstrItinClass itin,
          string opc, string asm, string cstr,
          list<dag> pattern>
@@ -366,7 +372,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class XI<dag oops, dag iops, AddrMode am, int sz,
          IndexMode im, Format f, InstrItinClass itin,
          string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
@@ -379,31 +385,31 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class AI<dag oops, dag iops, Format f, InstrItinClass itin,
          string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern>;
 class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
        opc, asm, "", pattern>;
 class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
        asm, "", pattern>;
 class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
          opc, asm, "", pattern>;
 
 // Ctrl flow instructions
 class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
       opc, asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
 class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
        asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
@@ -411,13 +417,13 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
 // BR_JT instructions
 class JTI<dag oops, dag iops, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
+  : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
        asm, "", pattern>;
 
 // Atomic load/store instructions
 class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rt;
   bits<4> Rn;
@@ -430,7 +436,7 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
 }
 class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rt;
@@ -460,21 +466,21 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
 // addrmode1 instructions
 class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
 }
 class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
        opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
 }
 class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
@@ -486,7 +492,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
 class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
              Format f, InstrItinClass itin, string opc, string asm,
              list<dag> pattern>
-  : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+  : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
       "", pattern> {
   let Inst{27-25} = op;
   let Inst{24} = 1;  // 24 == P
@@ -499,7 +505,7 @@ class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
 class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
                 IndexMode im, Format f, InstrItinClass itin, string opc,
                 string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode2, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   bits<4> Rt;
   let Inst{27-26} = 0b01;
@@ -547,7 +553,7 @@ class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
 // addrmode3 instructions
 class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
             InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   bits<14> addr;
   bits<4> Rt;
@@ -567,7 +573,7 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
 class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
                 IndexMode im, Format f, InstrItinClass itin, string opc,
                 string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   bits<4> Rt;
   let Inst{27-25} = 0b000;
@@ -583,7 +589,7 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
 class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
                   IndexMode im, Format f, InstrItinClass itin, string opc,
                   string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   // {13}     1 == imm8, 0 == Rm
   // {12-9}   Rn
@@ -627,7 +633,7 @@ class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
 // stores
 class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   bits<14> addr;
   bits<4> Rt;
@@ -647,7 +653,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
 // Pre-indexed stores
 class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -660,7 +666,7 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
 }
 class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -675,7 +681,7 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
 // Post-indexed stores
 class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
       opc, asm, cstr,pattern> {
   // {13}     1 == imm8, 0 == Rm
   // {12-9}   Rn
@@ -701,7 +707,7 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
 }
 class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -716,7 +722,7 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
 // addrmode4 instructions
 class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
            string asm, string cstr, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+  : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
   bits<4>  p;
   bits<16> regs;
   bits<4>  Rn;
@@ -730,7 +736,7 @@ class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
 // Unsigned multiply, multiply-accumulate instructions.
 class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{20}    = 0; // S bit
@@ -738,7 +744,7 @@ class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 }
 class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
        opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{27-21} = opcod;
@@ -747,7 +753,7 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 // Most significant word multiply
 class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
              InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rn;
@@ -770,7 +776,7 @@ class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
 // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
 class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
               InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rn;
   bits<4> Rm;
@@ -809,7 +815,7 @@ class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
 // Extend instructions.
 class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
       opc, asm, "", pattern> {
   // All AExtI instructions have Rd and Rm register operands.
   bits<4> Rd;
@@ -824,7 +830,7 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
 // Misc Arithmetic instructions.
 class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
                InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rm;
@@ -839,7 +845,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
 // PKH instructions
 class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rn;
@@ -874,7 +880,7 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 // Thumb Instruction Format Definitions.
 //
 
-class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
@@ -886,39 +892,32 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 // TI - Thumb instruction.
 class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
 
 // Two-address instructions
 class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
           list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
            pattern>;
 
 // tBL, tBX 32-bit instructions
 class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
            dag oops, dag iops, InstrItinClass itin, string asm,
            list<dag> pattern>
-    : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+    : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
       Encoding {
   let Inst{31-27} = opcod1;
   let Inst{15-14} = opcod2;
   let Inst{12}    = opcod3;
 }
 
-// Move to/from coprocessor instructions
-class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
-    Encoding, Requires<[IsThumb, HasV6]> {
-  let Inst{31-28} = 0b1110;
-}
-
 // BR_JT instructions
 class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
            list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+  : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
 
 // Thumb1 only
-class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
@@ -930,19 +929,19 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1I<dag oops, dag iops, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
 class T1Ix2<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+  : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
 
 // Two-address instructions
 class T1It<dag oops, dag iops, InstrItinClass itin,
            string asm, string cstr, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin,
             asm, cstr, pattern>;
 
 // Thumb1 instruction that can either be predicated or set CPSR.
-class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -955,16 +954,16 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1sI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
 
 // Two-address instructions
 class T1sIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
              "$Rn = $Rdn", pattern>;
 
 // Thumb1 instruction that can be predicated.
-class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -977,17 +976,17 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1pI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
 
 // Two-address instructions
 class T1pIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
              "$Rn = $Rdn", pattern>;
 
 class T1pIs<dag oops, dag iops,
             InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
 
 class Encoding16 : Encoding {
   let Inst{31-16} = 0x0000;
@@ -1036,7 +1035,7 @@ class T1BranchCond<bits<4> opcode> : Encoding16 {
 class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
                      InstrItinClass itin, string opc, string asm,
                      list<dag> pattern>
-  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
     T1LoadStore<0b0101, opcode> {
   bits<3> Rt;
   bits<8> addr;
@@ -1047,7 +1046,7 @@ class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
 class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
                         InstrItinClass itin, string opc, string asm,
                         list<dag> pattern>
-  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
     T1LoadStore<opA, {opB,?,?}> {
   bits<3> Rt;
   bits<8> addr;
@@ -1063,7 +1062,7 @@ class T1Misc<bits<7> opcode> : Encoding16 {
 }
 
 // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
-class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin,
               string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1080,7 +1079,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 //
 // FIXME: This uses unified syntax so {s} comes before {p}. We should make it
 // more consistent.
-class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1095,7 +1094,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1106,7 +1105,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsThumb2];
 }
 
-class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin,
               string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1119,22 +1118,22 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T2I<dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
 class T2Ii12<dag oops, dag iops, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
 class T2Ii8<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
 class T2Iso<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
 class T2Ipc<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
 class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
             pattern> {
   bits<4> Rt;
   bits<4> Rt2;
@@ -1153,32 +1152,32 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
 
 class T2sI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
 
 class T2XI<dag oops, dag iops, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
 class T2JTI<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
 
 // Move to/from coprocessor instructions
-class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
-  : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
-  let Inst{31-28} = 0b1111;
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+  : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+  let Inst{31-28} = opc;
 }
 
 // Two-address instructions
 class T2XIt<dag oops, dag iops, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
 
 // T2Iidxldst - Thumb2 indexed load / store instructions.
 class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
                  dag oops, dag iops,
                  AddrMode am, IndexMode im, InstrItinClass itin,
                  string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
+  : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", asm);
@@ -1232,7 +1231,7 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
 //
 
 // Almost all VFP instructions are predicable.
-class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
@@ -1247,7 +1246,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
             IndexMode im, Format f, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
@@ -1263,7 +1262,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
          opc, asm, "", pattern> {
   let PostEncoderMethod = "VFPThumb2PostEncoder";
 }
@@ -1272,7 +1271,7 @@ class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
 class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
   // Instruction operands.
   bits<5>  Dd;
@@ -1298,7 +1297,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
   // Instruction operands.
   bits<5>  Sd;
@@ -1324,7 +1323,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 // VFP Load / store multiple pseudo instructions.
 class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
                      list<dag> pattern>
-  : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+  : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
             cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1335,7 +1334,7 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
 // Load / store multiple
 class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+  : VFPXI<oops, iops, AddrMode4, 4, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // Instruction operands.
   bits<4>  Rn;
@@ -1355,7 +1354,7 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
 
 class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+  : VFPXI<oops, iops, AddrMode4, 4, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // Instruction operands.
   bits<4> Rn;
@@ -1569,7 +1568,7 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
             InstrItinClass itin, string opc, string dt, string asm, string cstr,
             list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
@@ -1581,7 +1580,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
 class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
              InstrItinClass itin, string opc, string asm, string cstr,
              list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", "\t", asm);
@@ -1621,7 +1620,7 @@ class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
 }
 
 class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
-  : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+  : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
             itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1630,7 +1629,7 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
 
 class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
                   list<dag> pattern>
-  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+  : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
             itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1859,7 +1858,7 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
 class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string dt, string asm, list<dag> pattern>
-  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
+  : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
             "", itin> {
   let Inst{27-20} = opcod1;
   let Inst{11-8}  = opcod2;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 6f48d967f919..adcbf1806fe3 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -14,7 +14,6 @@
 #include "ARMInstrInfo.h"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9af76df7c37d..a42dd1a54ec7 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+                                           SDTCisInt<1>]>;
+
 def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@@ -130,7 +133,7 @@ def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
 def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
-def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
                                [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
 def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
@@ -144,33 +147,48 @@ def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV4T           : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
+                                 AssemblerPredicate<"HasV4TOps">;
 def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
 def HasV5T           : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
-def HasV6            : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
+                                 AssemblerPredicate<"HasV5TEOps">;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
+                                 AssemblerPredicate<"HasV6Ops">;
 def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
-def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
+                                 AssemblerPredicate<"HasV6T2Ops">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7            : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
+                                 AssemblerPredicate<"HasV7Ops">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
-def HasFP16          : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
-def HasDivide        : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
+                                 AssemblerPredicate<"FeatureVFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
+                                 AssemblerPredicate<"FeatureVFP3">;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+                                 AssemblerPredicate<"FeatureNEON">;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">,
+                                 AssemblerPredicate<"FeatureFP16">;
+def HasDivide        : Predicate<"Subtarget->hasDivide()">,
+                                 AssemblerPredicate<"FeatureHWDiv">;
 def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureT2XtPk">;
+def HasThumb2DSP     : Predicate<"Subtarget->hasThumb2DSP()">,
+                                 AssemblerPredicate<"FeatureDSPThumb2">;
 def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureDB">;
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureMP">;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb          : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb          : Predicate<"Subtarget->isThumb()">,
+                                 AssemblerPredicate<"ModeThumb">;
 def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2         : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
-def IsARM            : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
+                                 AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsARM            : Predicate<"!Subtarget->isThumb()">,
+                                 AssemblerPredicate<"!ModeThumb">;
 def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
 
@@ -237,11 +255,13 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
 }], hi16>;
 
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range
-/// [0.65535].
-def imm0_65535 : ImmLeaf<i32, [{
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 65536;
-}]>;
+}]> {
+  let ParserMatchClass = Imm0_65535AsmOperand;
+}
 
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
 class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -294,16 +314,19 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
   let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // FIXME: get rid of this one?
 def uncondbrtarget : Operand<OtherVT> {
   let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Branch target for ARM. Handles conditional/unconditional
 def br_target : Operand<OtherVT> {
   let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Call target.
@@ -311,6 +334,7 @@ def br_target : Operand<OtherVT> {
 def bltarget : Operand<i32> {
   // Encoded the same as branch targets.
   let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Call target for ARM. Handles conditional/unconditional
@@ -318,6 +342,7 @@ def bltarget : Operand<i32> {
 def bl_target : Operand<i32> {
   // Encoded the same as branch targets.
   let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 
@@ -394,14 +419,20 @@ def shift_imm : Operand<i32> {
   let ParserMatchClass = ShifterAsmOperand;
 }
 
+def ShiftedRegAsmOperand : AsmOperandClass {
+  let Name = "ShiftedReg";
+}
+
 // shifter_operand operands: so_reg and so_imm.
 def so_reg : Operand<i32>,    // reg reg imm
              ComplexPattern<i32, 3, "SelectShifterOperandReg",
                             [shl,srl,sra,rotr]> {
   let EncoderMethod = "getSORegOpValue";
   let PrintMethod = "printSORegOperand";
+  let ParserMatchClass = ShiftedRegAsmOperand;
   let MIOperandInfo = (ops GPR, GPR, shift_imm);
 }
+// FIXME: Does this need to be distinct from so_reg?
 def shift_so_reg : Operand<i32>,    // reg reg imm
                    ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
                                   [shl,srl,sra,rotr]> {
@@ -416,7 +447,6 @@ def so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getSOImmVal(Imm) != -1;
   }]> {
   let EncoderMethod = "getSOImmOpValue";
-  let PrintMethod = "printSOImmOperand";
 }
 
 // Break so_imm's up into two pieces.  This handles immediates with up to 16
@@ -434,6 +464,22 @@ def arm_i32imm : PatLeaf<(imm), [{
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
 
+/// imm0_7 predicate - Immediate in the range [0,31].
+def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 8;
+}]> {
+  let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,31].
+def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 16;
+}]> {
+  let ParserMatchClass = Imm0_15AsmOperand;
+}
+
 /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
 def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 32;
@@ -673,7 +719,7 @@ include "ARMInstrFormats.td"
 /// binop that produces a value.
 multiclass AsI1_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                        PatFrag opnode, bit Commutable = 0> {
+                        PatFrag opnode, string baseOpc, bit Commutable = 0> {
   // The register-immediate version is re-materializable. This is useful
   // in particular for taking the address of a local.
   let isReMaterializable = 1 in {
@@ -713,6 +759,24 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
     let Inst{15-12} = Rd;
     let Inst{11-0} = shift;
   }
+
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
 }
 
 /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
@@ -909,9 +973,9 @@ multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
 }
 
 /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
-let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
-                             bit Commutable = 0> {
+                             string baseOpc, bit Commutable = 0> {
+  let Uses = [CPSR] in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
@@ -950,7 +1014,24 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{15-12} = Rd;
     let Inst{19-16} = Rn;
   }
-}
+  }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
 }
 
 // Carry setting variants
@@ -958,15 +1039,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 let usesCustomInserter = 1 in {
 multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-               Size4Bytes, IIC_iALUi,
+               4, IIC_iALUi,
                [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
   def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-               Size4Bytes, IIC_iALUr,
+               4, IIC_iALUr,
                [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
     let isCommutable = Commutable;
   }
   def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-               Size4Bytes, IIC_iALUsr,
+               4, IIC_iALUsr,
                [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
 }
 }
@@ -1116,9 +1197,8 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
 
 // The i32imm operand $val can be used by a debugger to store more information
 // about the breakpoint.
-def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
-              [/* For disassembly only; pattern left blank */]>,
-           Requires<[IsARM]> {
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+              "bkpt", "\t$val", []>, Requires<[IsARM]> {
   bits<16> val;
   let Inst{3-0} = val{3-0};
   let Inst{19-8} = val{15-4};
@@ -1208,9 +1288,8 @@ def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
   let Inst{8-0} = 0;
 }
 
-def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
-             [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsARM, HasV7]> {
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             []>, Requires<[IsARM, HasV7]> {
   bits<4> opt;
   let Inst{27-4} = 0b001100100000111100001111;
   let Inst{3-0} = opt;
@@ -1227,40 +1306,40 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
-                            Size4Bytes, IIC_iALUr,
+                            4, IIC_iALUr,
                             [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
 def PICLDR  : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_r,
+                            4, IIC_iLoad_r,
                             [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
 def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
 
 def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
 def PICSTR  : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+      4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+      4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
                                                    addrmodepc:$addr)]>;
 
 def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+      4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
 
@@ -1282,11 +1361,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
   let Inst{11-0} = label;
 }
 def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
-                    Size4Bytes, IIC_iALUi, []>;
+                    4, IIC_iALUi, []>;
 
 def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
                       (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                      Size4Bytes, IIC_iALUi, []>;
+                      4, IIC_iALUi, []>;
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
@@ -1319,22 +1398,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     let Inst{3-0}  = dst;
   }
 
-  // For disassembly only.
-  def BX_pred : AXI<(outs), (ins GPR:$dst, pred:$p), BrMiscFrm, IIC_Br,
-                  "bx$p\t$dst", [/* pattern left blank */]>,
+  def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+                  "bx", "\t$dst", [/* pattern left blank */]>,
               Requires<[IsARM, HasV4T]> {
     bits<4> dst;
     let Inst{27-4} = 0b000100101111111111110001;
     let Inst{3-0}  = dst;
   }
-
-  // ARMV4 only
-  // FIXME: We would really like to define this as a vanilla ARMPat like:
-  // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
-  // With that, however, we can't set isBranch, isTerminator, etc..
-  def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
-                    Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
-                    Requires<[IsARM, NoV4T]>;
 }
 
 // All calls clobber the non-callee saved registers. SP is marked as
@@ -1386,12 +1456,12 @@ let isCall = 1,
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                    Requires<[IsARM, HasV4T, IsNotDarwin]>;
 
   // ARMv4
   def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                    Requires<[IsARM, NoV4T, IsNotDarwin]>;
 }
 
@@ -1401,131 +1471,82 @@ let isCall = 1,
   // moved above / below calls.
   Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
   Uses = [R7, SP] in {
-  def BLr9  : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops),
-                Size4Bytes, IIC_Br,
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
-
-  def BLr9_pred : ARMPseudoInst<(outs),
-                   (ins bltarget:$func, pred:$p, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   [(ARMcall_pred tglobaladdr:$func)]>,
+  def BLr9  : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
+              Requires<[IsARM, IsDarwin]>;
+
+  def BLr9_pred : ARMPseudoExpand<(outs),
+                   (ins bl_target:$func, pred:$p, variable_ops),
+                   4, IIC_Br,
+                   [(ARMcall_pred tglobaladdr:$func)],
+                   (BL_pred bl_target:$func, pred:$p)>,
                   Requires<[IsARM, IsDarwin]>;
 
   // ARMv5T and above
-  def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops),
-                Size4Bytes, IIC_Br,
-                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>;
-
-  def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p,  variable_ops),
-                      Size4Bytes, IIC_Br,
-                      [(ARMcall_pred GPR:$func)]>,
+  def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall GPR:$func)],
+                (BLX GPR:$func)>,
+               Requires<[IsARM, HasV5T, IsDarwin]>;
+
+  def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
+                4, IIC_Br,
+                [(ARMcall_pred GPR:$func)],
+                (BLX_pred GPR:$func, pred:$p)>,
                    Requires<[IsARM, HasV5T, IsDarwin]>;
 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                   Requires<[IsARM, HasV4T, IsDarwin]>;
 
   // ARMv4
   def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                   Requires<[IsARM, NoV4T, IsDarwin]>;
 }
 
-// Tail calls.
-
-// FIXME: The Thumb versions of these should live in ARMInstrThumb.td
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-  // Darwin versions.
-  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsDarwin]>;
-
-    def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsDarwin]>;
-
-    def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsDarwin]>;
-
-    def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsDarwin]>;
-  }
-
-  // Non-Darwin versions (the difference is R9).
-  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
-
-    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
-
-    def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsNotDarwin]>;
-
-    def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsNotDarwin]>;
-
-    def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsNotDarwin]>;
-    def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsNotDarwin]>;
+let isBranch = 1, isTerminator = 1 in {
+  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+  // a two-value operand where a dag node expects two operands. :(
+  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+               IIC_Br, "b", "\t$target",
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+    bits<24> target;
+    let Inst{23-0} = target;
   }
-}
 
-let isBranch = 1, isTerminator = 1 in {
-  // B is "predicable" since it's just a Bcc with an 'always' condition.
   let isBarrier = 1 in {
+    // B is "predicable" since it's just a Bcc with an 'always' condition.
     let isPredicable = 1 in
     // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
     // should be sufficient.
-    def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br,
-                [(br bb:$target)]>;
+    // FIXME: Is B really a Barrier? That doesn't seem right.
+    def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+                [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
 
     let isNotDuplicable = 1, isIndirectBranch = 1 in {
     def BR_JTr : ARMPseudoInst<(outs),
                       (ins GPR:$target, i32imm:$jt, i32imm:$id),
-                      SizeSpecial, IIC_Br,
+                      0, IIC_Br,
                       [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
     // FIXME: This shouldn't use the generic "addrmode2," but rather be split
     // into i12 and rs suffixed versions.
     def BR_JTm : ARMPseudoInst<(outs),
                      (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
-                     SizeSpecial, IIC_Br,
+                     0, IIC_Br,
                      [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
                        imm:$id)]>;
     def BR_JTadd : ARMPseudoInst<(outs),
                    (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
-                   SizeSpecial, IIC_Br,
+                   0, IIC_Br,
                    [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
                      imm:$id)]>;
     } // isNotDuplicable = 1, isIndirectBranch = 1
   } // isBarrier = 1
 
-  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
-  // a two-value operand where a dag node expects two operands. :(
-  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
-               IIC_Br, "b", "\t$target",
-               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
-    bits<24> target;
-    let Inst{23-0} = target;
-  }
 }
 
 // BLX (immediate) -- for disassembly only
@@ -1538,14 +1559,65 @@ def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
   let Inst{24} = target{0};
 }
 
-// Branch and Exchange Jazelle -- for disassembly only
+// Branch and Exchange Jazelle
 def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
-              [/* For disassembly only; pattern left blank */]> {
+              [/* pattern left blank */]> {
+  bits<4> func;
   let Inst{23-20} = 0b0010;
-  //let Inst{19-8} = 0xfff;
+  let Inst{19-8} = 0xfff;
   let Inst{7-4} = 0b0010;
+  let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsDarwin]>;
+
+    def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (BX GPR:$dst)>,
+                   Requires<[IsARM, IsDarwin]>;
+
+  }
+
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsNotDarwin]>;
+
+    def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (BX GPR:$dst)>,
+                     Requires<[IsARM, IsNotDarwin]>;
+  }
 }
 
+
+
+
+
 // Secure Monitor Call is a system instruction -- for disassembly only
 def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
               [/* For disassembly only; pattern left blank */]> {
@@ -1562,7 +1634,6 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
   let Inst{23-0} = svc;
 }
 }
-def : MnemonicAlias<"swi", "svc">;
 
 // Store Return State is a system instruction -- for disassembly only
 let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
@@ -1908,10 +1979,12 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
 
 multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  // IA is the default, so no need for an explicit suffix on the
+  // mnemonic here. Without it is the cannonical spelling.
   def IA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
     let Inst{24-23} = 0b01;       // Increment After
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
@@ -1919,7 +1992,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def IA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
@@ -1984,17 +2057,14 @@ defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
 
 } // neverHasSideEffects
 
-// Load / Store Multiple Mnemonic Aliases
-def : MnemonicAlias<"ldm", "ldmia">;
-def : MnemonicAlias<"stm", "stmia">;
-
 // FIXME: remove when we have a way to marking a MI with these properties.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
-                                               reglist:$regs, variable_ops),
-                     Size4Bytes, IIC_iLoad_mBr, []>,
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                 reglist:$regs, variable_ops),
+                     4, IIC_iLoad_mBr, [],
+                     (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
       RegConstraint<"$Rn = $wb">;
 
 //===----------------------------------------------------------------------===//
@@ -2164,7 +2234,7 @@ defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
 
 def SBFX  : I<(outs GPR:$Rd),
               (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2181,7 +2251,7 @@ def SBFX  : I<(outs GPR:$Rd),
 
 def UBFX  : I<(outs GPR:$Rd),
               (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2202,10 +2272,10 @@ def UBFX  : I<(outs GPR:$Rd),
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, "ADD", 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>, "SUB">;
 
 // ADD and SUB with 's' bit set.
 defm ADDS : AI1_bin_s_irs<0b0100, "adds",
@@ -2216,9 +2286,11 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs",
                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
-                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+                          "ADC", 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
-                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+                          "SBC">;
 
 // ADC and SUBC with 's' bit set.
 let usesCustomInserter = 1 in {
@@ -2271,13 +2343,13 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
 let usesCustomInserter = 1 in {
 def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                 Size4Bytes, IIC_iALUi,
+                 4, IIC_iALUi,
                  [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
 def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-                 Size4Bytes, IIC_iALUr,
+                 4, IIC_iALUr,
                  [/* For disassembly only; pattern left blank */]>;
 def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                 Size4Bytes, IIC_iALUsr,
+                 4, IIC_iALUsr,
                  [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
 }
 
@@ -2325,10 +2397,10 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
 let usesCustomInserter = 1, Uses = [CPSR] in {
 def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                  Size4Bytes, IIC_iALUi,
+                  4, IIC_iALUi,
                   [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
 def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                  Size4Bytes, IIC_iALUsr,
+                  4, IIC_iALUsr,
                   [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
 }
 
@@ -2528,19 +2600,19 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, "ORR", 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
 
 def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfc", "\t$Rd, $imm", "$src = $Rd",
                [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
                Requires<[IsARM, HasV6T2]> {
@@ -2555,7 +2627,7 @@ def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
 
 // A8.6.18  BFI - Bitfield insert (Encoding A1)
 def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
                [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
                                 bf_inv_mask_imm:$imm))]>,
@@ -2575,7 +2647,7 @@ def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
 let isAsmParserOnly = 1 in
 def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
                                    lsb_pos_imm:$lsb, width_imm:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
                []>, Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2652,31 +2724,26 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
   let Inst{3-0}   = Rn;
 }
 
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+//        property. Remove them when it's possible to add those properties
+//        on an individual MachineInstr, not just an instuction description.
 let isCommutable = 1 in {
-let Constraints = "@earlyclobber $Rd" in
-def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
-                                          pred:$p, cc_out:$s),
-                          Size4Bytes, IIC_iMUL32,
-                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
-                        Requires<[IsARM, NoV6]>;
-
 def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                    IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
                    [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
                    Requires<[IsARM, HasV6]> {
   let Inst{15-12} = 0b0000;
 }
-}
 
 let Constraints = "@earlyclobber $Rd" in
-def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
-                         (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
-                         Size4Bytes, IIC_iMAC32,
-                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
-                        Requires<[IsARM, NoV6]> {
-  bits<4> Ra;
-  let Inst{15-12} = Ra;
+def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                            pred:$p, cc_out:$s),
+                          4, IIC_iMUL32,
+                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
+                         (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
 }
+
 def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
                     IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
                    [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
@@ -2685,6 +2752,14 @@ def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
   let Inst{15-12} = Ra;
 }
 
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+                          (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+                          4, IIC_iMAC32,
+                        [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+                  (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
+
 def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
                    IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
                    [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
@@ -2700,49 +2775,34 @@ def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
 }
 
 // Extra precision multiplies with low / high results
-
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
-def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMUL64, []>,
-                           Requires<[IsARM, NoV6]>;
-
-def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMUL64, []>,
-                           Requires<[IsARM, NoV6]>;
-}
-
 def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
-                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
                     "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
                     Requires<[IsARM, HasV6]>;
 
 def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
-                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
                     "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
                     Requires<[IsARM, HasV6]>;
-}
 
-// Multiply + accumulate
 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
-def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
-                           Requires<[IsARM, NoV6]>;
-def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
                             (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
+                            4, IIC_iMUL64, [],
+          (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
                            Requires<[IsARM, NoV6]>;
-def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
                             (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
+                            4, IIC_iMUL64, [],
+          (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
                            Requires<[IsARM, NoV6]>;
-
+}
 }
 
+// Multiply + accumulate
 def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
                                (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
                     "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
@@ -2765,6 +2825,25 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
   let Inst{11-8}  = Rm;
   let Inst{3-0}   = Rn;
 }
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p),
+                              4, IIC_iMAC64, [],
+          (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+                           Requires<[IsARM, NoV6]>;
+}
+
 } // neverHasSideEffects
 
 // Most significant word multiply
@@ -3005,31 +3084,22 @@ def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rev", "\t$Rd, $Rm",
               [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "rev16", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                   (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+               [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                  (sext_inreg
-                    (or (srl GPR:$Rm, (i32 8)),
-                        (shl GPR:$Rm, (i32 8))), i16))]>,
+               [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
-def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
-                               (shl GPR:$Rm, (i32 8))), i16),
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+                   (and (srl GPR:$Rm, (i32 8)), 0xFF)),
                (REVSH GPR:$Rm)>;
 
-// Need the AddedComplexity or else MOVs + REV would be chosen.
-let AddedComplexity = 5 in
-def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
-
 def lsl_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
   return CurDAG->getTargetConstant(Sh, MVT::i32);
@@ -3177,26 +3247,26 @@ def BCCZi64 : PseudoInst<(outs),
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
-                           Size4Bytes, IIC_iCMOVr,
+                           4, IIC_iCMOVr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_reg:$shift, pred:$p),
-                           Size4Bytes, IIC_iCMOVsr,
+                           4, IIC_iCMOVsr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
 def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
                              (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
-                             Size4Bytes, IIC_iMOVi,
+                             4, IIC_iMOVi,
                              []>,
       RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
 
 let isMoveImm = 1 in
 def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_imm:$imm, pred:$p),
-                           Size4Bytes, IIC_iCMOVi,
+                           4, IIC_iCMOVi,
    [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 
@@ -3204,12 +3274,12 @@ def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
 let isMoveImm = 1 in
 def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
                                 (ins GPR:$false, i32imm:$src, pred:$p),
-                  Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+                  8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
 def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_imm:$imm, pred:$p),
-                           Size4Bytes, IIC_iCMOVi,
+                           4, IIC_iCMOVi,
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $Rd">;
 } // neverHasSideEffects
@@ -3235,19 +3305,20 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
 }
 
 def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
-                "dsb", "\t$opt",
-                [/* For disassembly only; pattern left blank */]>,
+                "dsb", "\t$opt", []>,
                 Requires<[IsARM, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf57ff04;
   let Inst{3-0} = opt;
 }
 
-// ISB has only full system option -- for disassembly only
-def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "isb", "\t$opt", []>,
                 Requires<[IsARM, HasDB]> {
+  bits<4> opt;
   let Inst{31-4} = 0xf57ff06;
-  let Inst{3-0} = 0b1111;
+  let Inst{3-0} = opt;
 }
 
 let usesCustomInserter = 1 in {
@@ -3410,8 +3481,8 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
 // Coprocessor Instructions.
 //
 
-def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
             NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
             [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                           imm:$CRm, imm:$opc2)]> {
@@ -3431,8 +3502,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
   let Inst{23-20} = opc1;
 }
 
-def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
                NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                               imm:$CRm, imm:$opc2)]> {
@@ -3455,7 +3526,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
 
 class ACI<dag oops, dag iops, string opc, string asm,
           IndexMode im = IndexModeNone>
-  : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary,
+  : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
          opc, asm, "", [/* For disassembly only; pattern left blank */]> {
   let Inst{27-25} = 0b110;
 }
@@ -3583,8 +3654,8 @@ class MovRCopro<string opc, bit direction, dag oops, dag iops,
 
 def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
                     (outs),
-                    (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                         c_imm:$CRm, i32imm:$opc2),
+                    (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, imm0_7:$opc2),
                     [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]>;
 def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
@@ -3620,8 +3691,8 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
 
 def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
                       (outs),
-                      (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                           c_imm:$CRm, i32imm:$opc2),
+                      (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                           c_imm:$CRm, imm0_7:$opc2),
                       [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                      imm:$CRm, imm:$opc2)]>;
 def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
@@ -3635,7 +3706,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
 
 class MovRRCopro<string opc, bit direction,
                  list<dag> pattern = [/* For disassembly only */]>
-  : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+  : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
         GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
         NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
   let Inst{23-21} = 0b010;
@@ -3661,7 +3732,7 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
 
 class MovRRCopro2<string opc, bit direction,
                   list<dag> pattern = [/* For disassembly only */]>
-  : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+  : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
          GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
   let Inst{31-28} = 0b1111;
@@ -3812,6 +3883,13 @@ def Int_eh_sjlj_dispatchsetup :
 // Non-Instruction Patterns
 //
 
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+  def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+                    4, IIC_Br, [(brind GPR:$dst)],
+                    (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+                  Requires<[IsARM, NoV4T]>;
+
 // Large immediate handling.
 
 // 32-bit immediate using two piece so_imms or movw + movt.
@@ -3977,3 +4055,22 @@ include "ARMInstrVFP.td"
 
 include "ARMInstrNEON.td"
 
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 79d95d9b2683..0df62f456343 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -175,7 +175,7 @@ class VLDQQWBPseudo<InstrItinClass itin>
                 (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
 class VLDQQQQPseudo<InstrItinClass itin>
-  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
 class VLDQQQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -1387,7 +1387,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
           (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
           IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
-          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]> {
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
   let Rm = 0b1111;
 }
 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
@@ -3793,7 +3793,8 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VCNTiD,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+                     [(set DPR:$Vd,
+                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
@@ -3803,7 +3804,8 @@ def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VCNTiQ,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+                     [(set QPR:$Vd,
+                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
@@ -4212,17 +4214,12 @@ def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
 // Vector Move Operations.
 
 //   VMOV     : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
 
 let neverHasSideEffects = 1 in {
-def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
-                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
-  let Vn{4-0} = Vm{4-0};
-}
-def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
-                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
-  let Vn{4-0} = Vm{4-0};
-}
-
 // Pseudo vector move instructions for QQ and QQQQ registers. This should
 // be expanded after register allocation is completed.
 def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
@@ -4702,11 +4699,10 @@ def VEXTd32 : VEXTd<"vext", "32", v2i32> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
-def VEXTdf  : VEXTd<"vext", "32", v2f32> {
-  let Inst{11-10}    = index{1-0};
-  let Inst{9-8}  = 0b00;
-
-}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+                           (v2f32 DPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
 def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
   let Inst{11-8} = index{3-0};
@@ -4719,10 +4715,10 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
-def VEXTqf  : VEXTq<"vext", "32", v4f32> {
-  let Inst{11-10} = index{1-0};
-  let Inst{9-8}   = 0b00;
-}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+                           (v4f32 QPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
 
 //   VTRN     : Vector Transpose
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 4777189934c0..bfe83eceb13f 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -26,17 +26,14 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
 }]>;
 
-/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
-def imm0_7 : ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 8;
-}]>;
 def imm0_7_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)-N->getZExtValue() < 8;
 }], imm_neg_XFORM>;
 
-def imm0_255 : ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 256;
-}]>;
+def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+  let ParserMatchClass = imm0_255_asmoperand;
+}
 def imm0_255_comp : PatLeaf<(i32 imm), [{
   return ~((uint32_t)N->getZExtValue()) < 256;
 }]>;
@@ -74,10 +71,12 @@ def t_adrlabel : Operand<i32> {
 // Scaled 4 immediate.
 def t_imm_s4 : Operand<i32> {
   let PrintMethod = "printThumbS4ImmOperand";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // Define Thumb specific addressing modes.
 
+let OperandType = "OPERAND_PCREL" in {
 def t_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getThumbBRTargetOpValue";
 }
@@ -97,6 +96,7 @@ def t_bltarget : Operand<i32> {
 def t_blxtarget : Operand<i32> {
   let EncoderMethod = "getThumbBLXTargetOpValue";
 }
+}
 
 def MemModeRegThumbAsmOperand : AsmOperandClass {
   let Name = "MemModeRegThumb";
@@ -360,27 +360,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
 //  Control Flow Instructions.
 //
 
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
-                   [(ARMretflag)]>,
-                T1Special<{1,1,0,?}> {
-    // A6.2.3 & A8.6.25
-    let Inst{6-3} = 0b1110; // Rm = lr
-    let Inst{2-0} = 0b000;
-  }
-
-  // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
-                          IIC_Br, "bx\t$Rm",
-                          []>,
-                       T1Special<{1,1,0,?}> {
-    // A6.2.3 & A8.6.25
-    bits<4> Rm;
-    let Inst{6-3} = Rm;
-    let Inst{2-0} = 0b000;
-  }
-}
-
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
@@ -390,31 +369,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     let Inst{6-3} = Rm;
     let Inst{2-0} = 0b000;
   }
-
-  def tBRIND : TI<(outs), (ins GPR:$Rm),
-                  IIC_Br,
-                  "mov\tpc, $Rm",
-                  [(brind GPR:$Rm)]>,
-               T1Special<{1,0,?,?}> {
-    // A8.6.97
-    bits<4> Rm;
-    let Inst{7}   = 1;          // <Rd> = Inst{7:2-0} = pc
-    let Inst{6-3} = Rm;
-    let Inst{2-0} = 0b111;
-  }
 }
 
-// FIXME: remove when we have a way to marking a MI with these properties.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
-    hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
-                   IIC_iPop_Br,
-                   "pop${p}\t$regs", []>,
-               T1Misc<{1,1,0,?,?,?,?}> {
-  // A8.6.121
-  bits<16> regs;
-  let Inst{8}   = regs{15};     // registers = P:'0000000':register_list
-  let Inst{7-0} = regs{7-0};
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+                   [(ARMretflag)], (tBX LR, pred:$p)>;
+
+  // Alternative return instruction used by vararg functions.
+  def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+                   2, IIC_Br, [],
+                   (tBX GPR:$Rm, pred:$p)>;
 }
 
 // All calls clobber the non-callee saved registers. SP is marked as a use to
@@ -464,7 +428,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size4Bytes, IIC_Br,
+                  4, IIC_Br,
                   [(ARMcall_nolink tGPR:$func)]>,
             Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
 }
@@ -516,7 +480,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size4Bytes, IIC_Br,
+                   4, IIC_Br,
                    [(ARMcall_nolink tGPR:$func)]>,
               Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
 }
@@ -534,12 +498,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
   // Just a pseudo for a tBL instruction. Needed to let regalloc know about
   // the clobber of LR.
   let Defs = [LR] in
-  def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
-                          Size4Bytes, IIC_Br, []>;
+  def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
+                          4, IIC_Br, [], (tBL t_bltarget:$target)>;
 
   def tBR_JTr : tPseudoInst<(outs),
                       (ins tGPR:$target, i32imm:$jt, i32imm:$id),
-                      SizeSpecial, IIC_Br,
+                      0, IIC_Br,
                       [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
     list<Predicate> Predicates = [IsThumb, IsThumb1Only];
   }
@@ -583,6 +547,33 @@ let isBranch = 1, isTerminator = 1 in {
   }
 }
 
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
+    // on Darwin), so it's in ARMInstrThumb2.td.
+    def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsDarwin]>;
+  }
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (tB t_brtarget:$dst)>,
+                 Requires<[IsThumb, IsNotDarwin]>;
+    def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsNotDarwin]>;
+  }
+}
+
+
 // A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
 // A8.6.16 B: Encoding T1
 // If Inst{11-8} == 0b1111 then SEE SVC
@@ -685,19 +676,6 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
   let Inst{7-0} = addr;
 }
 
-// Special instruction for restore. It cannot clobber condition register
-// when it's expanded by eliminateCallFramePseudoInstr().
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
-// FIXME: Pseudo for tLDRspi
-def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
-                     "ldr", "\t$dst, $addr", []>,
-               T1LdStSP<{1,?,?}> {
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0} = addr;
-}
-
 // Load tconstpool
 // FIXME: Use ldr.n to work around a Darwin assembler bug.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -739,9 +717,9 @@ defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
 
 // A8.6.207 & A8.6.205
 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
-                                t_addrmode_is2, AddrModeT1_2,
-                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
-                                BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+                               t_addrmode_is2, AddrModeT1_2,
+                               IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                               BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
@@ -754,19 +732,6 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
   let Inst{7-0} = addr;
 }
 
-let mayStore = 1, neverHasSideEffects = 1 in
-// Special instruction for spill. It cannot clobber condition register when it's
-// expanded by eliminateCallFramePseudoInstr().
-// FIXME: Pseudo for tSTRspi
-def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
-                  "str", "\t$src, $addr", []>,
-             T1LdStSP<{0,?,?}> {
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0} = addr;
-}
-
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -911,7 +876,8 @@ def tADC :                      // A8.6.2
 
 // Add immediate
 def tADDi3 :                    // A8.6.4 T1
-  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
                    "add", "\t$Rd, $Rm, $imm3",
                    [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
   bits<3> imm3;
@@ -1071,7 +1037,7 @@ def tLSRrr :                    // A8.6.91
 
 // Move register
 let isMoveImm = 1 in
-def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
                   "mov", "\t$Rd, $imm8",
                   [(set tGPR:$Rd, imm0_255:$imm8)]>,
              T1General<{1,0,0,?,?}> {
@@ -1082,18 +1048,18 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
   let Inst{7-0}  = imm8;
 }
 
-// TODO: A7-73: MOV(2) - mov setting flag.
+// A7-73: MOV(2) - mov setting flag.
 
 let neverHasSideEffects = 1 in {
-// FIXME: Make this predicable.
-def tMOVr       : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
-                      "mov\t$Rd, $Rm", []>,
-                  T1Special<0b1000> {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+                      2, IIC_iMOVr,
+                      "mov", "\t$Rd, $Rm", "", []>,
+                  T1Special<{1,0,?,?}> {
   // A8.6.97
   bits<4> Rd;
   bits<4> Rm;
-  // Bits {7-6} are encoded by the T1Special value.
-  let Inst{5-3} = Rm{2-0};
+  let Inst{7}   = Rd{3};
+  let Inst{6-3} = Rm;
   let Inst{2-0} = Rd{2-0};
 }
 let Defs = [CPSR] in
@@ -1106,39 +1072,6 @@ def tMOVSr      : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
   let Inst{5-3}  = Rm;
   let Inst{2-0}  = Rd;
 }
-
-// FIXME: Make these predicable.
-def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,0,?}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  // Bit {7} is encoded by the T1Special value.
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rd{2-0};
-}
-def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,?,0}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  // Bit {6} is encoded by the T1Special value.
-  let Inst{7}   = Rd{3};
-  let Inst{5-3} = Rm{2-0};
-  let Inst{2-0} = Rd{2-0};
-}
-def tMOVgpr2gpr  : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,?,?}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  let Inst{7}   = Rd{3};
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rd{2-0};
-}
 } // neverHasSideEffects
 
 // Multiply register
@@ -1175,31 +1108,16 @@ def tREV16 :                    // A8.6.135
   T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "rev16", "\t$Rd, $Rm",
-             [(set tGPR:$Rd,
-                   (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+             [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
 def tREVSH :                    // A8.6.136
   T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "revsh", "\t$Rd, $Rm",
-                 [(set tGPR:$Rd,
-                       (sext_inreg
-                         (or (srl tGPR:$Rm, (i32 8)),
-                             (shl tGPR:$Rm, (i32 8))), i16))]>,
+                 [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
                  Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
-def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl tGPR:$Rm, (i32 8))), i16),
-            (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
 // Rotate right register
 def tROR :                      // A8.6.139
   T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1294,31 +1212,6 @@ let usesCustomInserter = 1 in  // Expanded after instruction selection.
               NoItinerary,
              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
 
-
-// 16-bit movcc in IT blocks for Thumb2.
-let neverHasSideEffects = 1 in {
-def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
-                    "mov", "\t$Rdn, $Rm", []>,
-              T1Special<{1,0,?,?}> {
-  bits<4> Rdn;
-  bits<4> Rm;
-  let Inst{7}   = Rdn{3};
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rdn{2-0};
-}
-
-let isMoveImm = 1 in
-def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
-                    "mov", "\t$Rdn, $Rm", []>,
-              T1General<{1,0,0,?,?}> {
-  bits<3> Rdn;
-  bits<8> Rm;
-  let Inst{10-8} = Rdn;
-  let Inst{7-0}  = Rm;
-}
-
-} // neverHasSideEffects
-
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
 
@@ -1333,118 +1226,22 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
 
 let neverHasSideEffects = 1, isReMaterializable = 1 in
 def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
-                              Size2Bytes, IIC_iALUi, []>;
+                              2, IIC_iALUi, []>;
 
 def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
                               (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                              Size2Bytes, IIC_iALUi, []>;
-
-//===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
-//
-
-class tMovRCopro<string opc, bit direction, dag oops, dag iops,
-                 list<dag> pattern>
-  : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
-          pattern> {
-  let Inst{27-24} = 0b1110;
-  let Inst{20} = direction;
-  let Inst{4} = 1;
-
-  bits<4> Rt;
-  bits<4> cop;
-  bits<3> opc1;
-  bits<3> opc2;
-  bits<4> CRm;
-  bits<4> CRn;
-
-  let Inst{15-12} = Rt;
-  let Inst{11-8}  = cop;
-  let Inst{23-21} = opc1;
-  let Inst{7-5}   = opc2;
-  let Inst{3-0}   = CRm;
-  let Inst{19-16} = CRn;
-}
-
-def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
-           (outs),
-           (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                c_imm:$CRm, i32imm:$opc2),
-           [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
-                         imm:$CRm, imm:$opc2)]>;
-def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
-           (outs GPR:$Rt),
-           (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-           []>;
-
-def : Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
-          (tMRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>,
-          Requires<[IsThumb, HasV6T2]>;
-
-class tMovRRCopro<string opc, bit direction,
-                  list<dag> pattern = [/* For disassembly only */]>
-  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
-          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
-  let Inst{27-24} = 0b1100;
-  let Inst{23-21} = 0b010;
-  let Inst{20} = direction;
-
-  bits<4> Rt;
-  bits<4> Rt2;
-  bits<4> cop;
-  bits<4> opc1;
-  bits<4> CRm;
-
-  let Inst{15-12} = Rt;
-  let Inst{19-16} = Rt2;
-  let Inst{11-8}  = cop;
-  let Inst{7-4}   = opc1;
-  let Inst{3-0}   = CRm;
-}
-
-def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
-                        [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
-                                       imm:$CRm)]>;
-def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
-
-//===----------------------------------------------------------------------===//
-// Other Coprocessor Instructions.  For disassembly only.
-//
-def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
-                 [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
-                               imm:$CRm, imm:$opc2)]> {
-  let Inst{27-24} = 0b1110;
-
-  bits<4> opc1;
-  bits<4> CRn;
-  bits<4> CRd;
-  bits<4> cop;
-  bits<3> opc2;
-  bits<4> CRm;
-
-  let Inst{3-0}   = CRm;
-  let Inst{4}     = 0;
-  let Inst{7-5}   = opc2;
-  let Inst{11-8}  = cop;
-  let Inst{15-12} = CRd;
-  let Inst{19-16} = CRn;
-  let Inst{23-20} = opc1;
-}
+                              2, IIC_iALUi, []>;
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
 
 // __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1, Defs = [R0, LR], Uses = [SP] in
-def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
-                   "bl\t__aeabi_read_tp",
-                   [(set R0, ARMthread_pointer)]> {
-  // Encoding is 0xf7fffffe.
-  let Inst = 0xf7fffffe;
-}
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+                          [(set R0, ARMthread_pointer)]>;
 
 //===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
@@ -1463,14 +1260,14 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
 let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
     hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
 def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
-                                  AddrModeNone, SizeSpecial, NoItinerary, "","",
+                                  AddrModeNone, 0, NoItinerary, "","",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 
 // FIXME: Non-Darwin version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
     Defs = [ R7, LR, SP ] in
 def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
-                              AddrModeNone, SizeSpecial, IndexModeNone,
+                              AddrModeNone, 0, IndexModeNone,
                               Pseudo, NoItinerary, "", "",
                               [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
                              Requires<[IsThumb, IsDarwin]>;
@@ -1583,3 +1380,18 @@ def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                           2, IIC_iPop_Br, [],
+                           (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+                  2, IIC_Br, [(brind GPR:$Rm)],
+                  (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 598660c69fbe..c2c6cbcac0f5 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -44,9 +44,11 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // t2_so_imm - Match a 32-bit immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
 // immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
 def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getT2SOImmVal(Imm) != -1;
   }]> {
+  let ParserMatchClass = t2_so_imm_asmoperand;
   let EncoderMethod = "getT2SOImmOpValue";
 }
 
@@ -463,7 +465,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
 /// changed to modify CPSR.
 multiclass T2I_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, bit Commutable = 0, string wide = ""> {
+                       PatFrag opnode, string baseOpc, bit Commutable = 0,
+                       string wide = ""> {
    // shifted imm
    def ri : T2sTwoRegImm<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
@@ -495,14 +498,31 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
    }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    rGPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
 }
 
 /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
-//  the ".w" prefix to indicate that they are wide.
+//  the ".w" suffix to indicate that they are wide.
 multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                         PatFrag opnode, bit Commutable = 0> :
-    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
+                         PatFrag opnode, string baseOpc, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
 
 /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
 /// reversed.  The 'rr' form is only defined for the disassembler; for codegen
@@ -696,18 +716,18 @@ let usesCustomInserter = 1 in {
 multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
-                Size4Bytes, IIC_iALUi,
+                4, IIC_iALUi,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
    // register
    def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-                Size4Bytes, IIC_iALUr,
+                4, IIC_iALUr,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : t2PseudoInst<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
-                Size4Bytes, IIC_iALUsi,
+                4, IIC_iALUsi,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
 }
 }
@@ -1018,7 +1038,8 @@ multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
 // supported yet.
 multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
   def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
-                  opc, "\t$Rd, $Rm", []> {
+                  opc, "\t$Rd, $Rm", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -1028,7 +1049,8 @@ multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
      let Inst{5-4} = 0b00; // rotate
    }
   def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
-                  opc, "\t$Rd, $Rm, ror $rot", []> {
+                  opc, "\t$Rd, $Rm, ror $rot", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -1084,7 +1106,7 @@ multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+  def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
                   IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
@@ -1142,93 +1164,13 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
 
 let neverHasSideEffects = 1, isReMaterializable = 1 in
 def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
-                                Size4Bytes, IIC_iALUi, []>;
+                                4, IIC_iALUi, []>;
 def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
                                 (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                                Size4Bytes, IIC_iALUi,
+                                4, IIC_iALUi,
                                 []>;
 
 
-// FIXME: None of these add/sub SP special instructions should be necessary
-// at all for thumb2 since they use the same encodings as the generic
-// add/sub instructions. In thumb1 we need them since they have dedicated
-// encodings. At the least, they should be pseudo instructions.
-// ADD r, sp, {so_imm|i12}
-let isCodeGenOnly = 1 in {
-def t2ADDrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
-                        IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b1000;
-  let Inst{15} = 0;
-}
-def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
-                       IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25-20} = 0b100000;
-  let Inst{15} = 0;
-}
-
-// ADD r, sp, so_reg
-def t2ADDrSPs   : T2sTwoRegShiftedReg<
-                        (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
-                        IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b1000;
-  let Inst{15} = 0;
-}
-
-// SUB r, sp, {so_imm|i12}
-def t2SUBrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
-                        IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b1101;
-  let Inst{15} = 0;
-}
-def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
-                       IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25-20} = 0b101010;
-  let Inst{15} = 0;
-}
-
-// SUB r, sp, so_reg
-def t2SUBrSPs   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
-                       IIC_iALUsi,
-                       "sub", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b1101;
-  let Inst{19-16} = 0b1101; // Rn = sp
-  let Inst{15} = 0;
-}
-} // end isCodeGenOnly = 1
-
-// Signed and unsigned division on v7-M
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "sdiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011100;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "udiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011101;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
@@ -1668,6 +1610,10 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
   let Inst{15} = 0;
 }
 
+def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+                                                 pred:$p, cc_out:$s)>,
+                Requires<[IsThumb2]>;
+
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
 def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
                    "movw", "\t$Rd, $imm",
@@ -1788,8 +1734,10 @@ defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
                           BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
                           BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
+                                                             node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
+                                                             node:$RHS)>>;
 
 // RSB
 defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
@@ -1833,7 +1781,8 @@ def : T2Pat<(adde_live_carry       rGPR:$src, t2_so_imm_not:$imm),
 // Select Bytes -- for disassembly only
 
 def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
+                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-24} = 0b010;
   let Inst{23} = 0b1;
@@ -1849,7 +1798,8 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
               list<dag> pat = [/* For disassembly only; pattern left blank */],
               dag iops = (ins rGPR:$Rn, rGPR:$Rm),
               string asm = "\t$Rd, $Rn, $Rm">
-  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
+  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+    Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0101;
   let Inst{22-20} = op22_20;
@@ -1947,12 +1897,14 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
 
 def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                                            (ins rGPR:$Rn, rGPR:$Rm),
-                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
+                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                        (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
-                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
+                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 
 // Signed/Unsigned saturate -- for disassembly only
 
@@ -1985,7 +1937,8 @@ def t2SSAT: T2SatI<
 def t2SSAT16: T2SatI<
                 (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
                 "ssat16", "\t$Rd, $sat_imm, $Rn",
-                [/* For disassembly only; pattern left blank */]> {
+                [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1100;
   let Inst{20} = 0;
@@ -2005,10 +1958,11 @@ def t2USAT: T2SatI<
   let Inst{15} = 0;
 }
 
-def t2USAT16: T2SatI<
-                    (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
-                   "usat16", "\t$dst, $sat_imm, $Rn",
-                   [/* For disassembly only; pattern left blank */]> {
+def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+                     NoItinerary,
+                     "usat16", "\t$dst, $sat_imm, $Rn",
+                     [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1110;
   let Inst{20} = 0;
@@ -2084,17 +2038,18 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
 
 defm t2AND  : T2I_bin_w_irs<0b0000, "and",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
 defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(or  node:$LHS, node:$RHS)>, "t2ORR", 1>;
 defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
 
 defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+                            BinOpFrag<(and node:$LHS, (not node:$RHS))>,
+                            "t2BIC">;
 
 class T2BitFI<dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
@@ -2194,7 +2149,8 @@ let Constraints = "$src = $Rd" in {
 
 defm t2ORN  : T2I_bin_irs<0b0011, "orn",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>, 0, "">;
+                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>,
+                          "t2ORN", 0, "">;
 
 // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
 let AddedComplexity = 1 in
@@ -2277,7 +2233,8 @@ def t2UMLAL : T2MulLong<0b110, 0b0000,
 def t2UMAAL : T2MulLong<0b110, 0b0110,
                   (outs rGPR:$RdLo, rGPR:$RdHi),
                   (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
-                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 } // neverHasSideEffects
 
 // Rounding variants of the below included for disassembly only
@@ -2285,7 +2242,8 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
 // Most significant word multiply
 def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
                   "smmul", "\t$Rd, $Rn, $Rm",
-                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
+                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2294,7 +2252,8 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
 }
 
 def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
-                  "smmulr", "\t$Rd, $Rn, $Rm", []> {
+                  "smmulr", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2305,7 +2264,8 @@ def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
 def t2SMMLA : T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmla", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2314,7 +2274,8 @@ def t2SMMLA : T2FourReg<
 
 def t2SMMLAR: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
+                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2324,7 +2285,8 @@ def t2SMMLAR: T2FourReg<
 def t2SMMLS: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmls", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2333,7 +2295,8 @@ def t2SMMLS: T2FourReg<
 
 def t2SMMLSR:T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
+                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2344,7 +2307,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sext_inreg rGPR:$Rm, i16)))]> {
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2356,7 +2320,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sra rGPR:$Rm, (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2368,7 +2333,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sext_inreg rGPR:$Rm, i16)))]> {
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2380,7 +2346,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sra rGPR:$Rm, (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2392,7 +2359,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2404,7 +2372,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
+                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2421,7 +2390,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
               !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra,
                                (opnode (sext_inreg rGPR:$Rn, i16),
-                                       (sext_inreg rGPR:$Rm, i16))))]> {
+                                       (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2433,7 +2403,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
-                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2445,7 +2416,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                               (sext_inreg rGPR:$Rm, i16))))]> {
+                                               (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2457,7 +2429,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2469,7 +2442,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2481,7 +2455,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2496,66 +2471,108 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
 def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 
 // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 // These are for disassembly only.
 
 def t2SMUAD: T2ThreeReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUADX:T2ThreeReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSD: T2ThreeReg_mac<
             0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSDX:T2ThreeReg_mac<
             0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMLAD   : T2ThreeReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLADX  : T2FourReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+//  Division Instructions.
+//  Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
@@ -2585,25 +2602,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
 
 def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "rev16", ".w\t$Rd, $Rm",
-                [(set rGPR:$Rd,
-                    (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
-                        (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
-                            (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+                [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
 
 def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "revsh", ".w\t$Rd, $Rm",
-                 [(set rGPR:$Rd,
-                    (sext_inreg
-                      (or (srl rGPR:$Rm, (i32 8)),
-                          (shl rGPR:$Rm, (i32 8))), i16))]>;
+                 [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
 
-def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl rGPR:$Rm, (i32 8))), i16),
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+                (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
             (t2REVSH rGPR:$Rm)>;
 
-def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
-
 def t2PKHBT : T2ThreeReg<
             (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
                   IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
@@ -2699,33 +2707,21 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2TwoReg<
-                   (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
-                   "mov", ".w\t$Rd, $Rm",
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, rGPR:$Rm, pred:$p),
+                            4, IIC_iCMOVr,
    [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd"> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b0010;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1111; // Rn
-  let Inst{14-12} = 0b000;
-  let Inst{7-4} = 0b0000;
-}
+                RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
-def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
-                   IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+                   4, IIC_iCMOVi,
 [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
-                   RegConstraint<"$false = $Rd"> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b0010;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1111; // Rn
-  let Inst{15} = 0;
-}
+                   RegConstraint<"$false = $Rd">;
 
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
 let isMoveImm = 1 in
 def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
                       IIC_iCMOVi,
@@ -2792,6 +2788,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
                              (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
                              IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
                  RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -2826,7 +2823,7 @@ def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
   let Inst{3-0} = 0b1111;
 }
 
-class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
   : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
@@ -2842,7 +2839,7 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
-class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
   : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
@@ -2861,16 +2858,15 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr",
-                         "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr",
-                         "", []>;
-def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                       Size4Bytes, NoItinerary,
-                       "ldrex", "\t$Rt, $addr", "",
-                      []> {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                       AddrModeNone, 4, NoItinerary,
+                       "ldrex", "\t$Rt, $addr", "", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000101;
   let Inst{11-8} = 0b1111;
@@ -2884,7 +2880,7 @@ def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone
 let hasExtraDefRegAllocReq = 1 in
 def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
                          (ins t2addrmode_reg:$addr),
-                         AddrModeNone, Size4Bytes, NoItinerary,
+                         AddrModeNone, 4, NoItinerary,
                          "ldrexd", "\t$Rt, $Rt2, $addr", "",
                          [], {?, ?, ?, ?}> {
   bits<4> Rt2;
@@ -2893,14 +2889,16 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
 }
 
 let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
-                  "strexb", "\t$Rd, $Rt, $addr", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
-                  "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexh", "\t$Rd, $Rt, $addr", "", []>;
 def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
+                  AddrModeNone, 4, NoItinerary,
                   "strex", "\t$Rd, $Rt, $addr", "",
                   []> {
   let Inst{31-27} = 0b11101;
@@ -2919,7 +2917,7 @@ def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
 let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
 def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
-                         AddrModeNone, Size4Bytes, NoItinerary,
+                         AddrModeNone, 4, NoItinerary,
                          "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
                          {?, ?, ?, ?}> {
   bits<4> Rt2;
@@ -2940,22 +2938,6 @@ def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
 }
 
 //===----------------------------------------------------------------------===//
-// TLS Instructions
-//
-
-// __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1,
-  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
-  def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
-                     "bl\t__aeabi_read_tp",
-                     [(set R0, ARMthread_pointer)]> {
-    let Inst{31-27} = 0b11110;
-    let Inst{15-14} = 0b11;
-    let Inst{12} = 1;
-  }
-}
-
-//===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
 //   eh_sjlj_setjmp() is an instruction sequence to store the return
 //   address and save #0 in R0 for the non-longjmp case.
@@ -2973,7 +2955,7 @@ let Defs =
     QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
   hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                               AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                              Requires<[IsThumb2, HasVFP2]>;
 }
@@ -2982,7 +2964,7 @@ let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
   hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                               AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                                   Requires<[IsThumb2, NoVFP]>;
 }
@@ -2993,28 +2975,14 @@ let Defs =
 //
 
 // FIXME: remove when we have a way to marking a MI with these properties.
-// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
-// operand list.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
-                                        reglist:$regs, variable_ops),
-                        IIC_iLoad_mBr,
-                        "ldmia${p}.w\t$Rn!, $regs",
-                        "$Rn = $wb", []> {
-  bits<4>  Rn;
-  bits<16> regs;
-
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = 0b01;     // Increment After
-  let Inst{22}    = 0;
-  let Inst{21}    = 1;        // Writeback
-  let Inst{20}    = 1;
-  let Inst{19-16} = Rn;
-  let Inst{15-0}  = regs;
-}
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                   reglist:$regs, variable_ops),
+                              4, IIC_iLoad_mBr, [],
+            (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+                         RegConstraint<"$Rn = $wb">;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isPredicable = 1 in
@@ -3036,17 +3004,17 @@ def t2B   : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
 def t2BR_JT : t2PseudoInst<(outs),
           (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
-           SizeSpecial, IIC_Br,
+           0, IIC_Br,
           [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
 
 // FIXME: Add a non-pc based case that can be predicated.
 def t2TBB_JT : t2PseudoInst<(outs),
         (ins GPR:$index, i32imm:$jt, i32imm:$id),
-         SizeSpecial, IIC_Br, []>;
+         0, IIC_Br, []>;
 
 def t2TBH_JT : t2PseudoInst<(outs),
         (ins GPR:$index, i32imm:$jt, i32imm:$id),
-         SizeSpecial, IIC_Br, []>;
+         0, IIC_Br, []>;
 
 def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
                     "tbb", "\t[$Rn, $Rm]", []> {
@@ -3094,11 +3062,22 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
   let Inst{10-0} = target{11-1};
 }
 
+// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin version.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in
+  def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (t2B uncondbrtarget:$dst)>,
+                 Requires<[IsThumb2, IsDarwin]>;
+}
 
 // IT block
 let Defs = [ITSTATE] in
 def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
-                    AddrModeNone, Size2Bytes,  IIC_iALUx,
+                    AddrModeNone, 2,  IIC_iALUx,
                     "it$mask\t$cc", "", []> {
   // 16-bit instruction.
   let Inst{31-16} = 0x0000;
@@ -3178,8 +3157,7 @@ def t2WFE   : T2I_hint<0b00000010, "wfe",   ".w">;
 def t2WFI   : T2I_hint<0b00000011, "wfi",   ".w">;
 def t2SEV   : T2I_hint<0b00000100, "sev",   ".w">;
 
-def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
-                [/* For disassembly only; pattern left blank */]> {
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
   let Inst{31-20} = 0xf3a;
   let Inst{15-14} = 0b10;
   let Inst{12} = 0;
@@ -3347,12 +3325,13 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
 }
 
 //===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
+// Move between coprocessor and ARM core register
 //
 
-class t2MovRCopro<string opc, bit direction, dag oops, dag iops,
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
                   list<dag> pattern>
-  : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+  : T2Cop<Op, oops, iops,
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
           pattern> {
   let Inst{27-24} = 0b1110;
   let Inst{20} = direction;
@@ -3373,22 +3352,10 @@ class t2MovRCopro<string opc, bit direction, dag oops, dag iops,
   let Inst{19-16} = CRn;
 }
 
-def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */,
-             (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                          c_imm:$CRm, i32imm:$opc2),
-             [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
-                            imm:$CRm, imm:$opc2)]>;
-def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */,
-             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
-                                  c_imm:$CRm, i32imm:$opc2), []>;
-
-def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
-                            imm:$CRm, imm:$opc2),
-              (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
-
-class t2MovRRCopro<string opc, bit direction,
-                   list<dag> pattern = [/* For disassembly only */]>
-  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+                   list<dag> pattern = []>
+  : T2Cop<Op, (outs),
+          (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
           !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
   let Inst{27-24} = 0b1100;
   let Inst{23-21} = 0b010;
@@ -3407,19 +3374,77 @@ class t2MovRRCopro<string opc, bit direction,
   let Inst{3-0}   = CRm;
 }
 
-def t2MCRR2 : t2MovRRCopro<"mcrr2",
-                           0 /* from ARM core register to coprocessor */,
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+           (outs),
+           (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                c_imm:$CRm, imm0_7:$opc2),
+           [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                         imm:$CRm, imm:$opc2)]>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+             (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, imm0_7:$opc2),
+             [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                            imm:$CRm, imm:$opc2)]>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+           (outs GPR:$Rt),
+           (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+           []>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+                                  c_imm:$CRm, i32imm:$opc2), []>;
+
+def : T2v6Pat<(int_arm_mrc  imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+                        [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+                                       imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
                            [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
                                            GPR:$Rt2, imm:$CRm)]>;
-def t2MRRC2 : t2MovRRCopro<"mrrc2",
-                           1 /* from coprocessor to ARM core register */>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
 
 //===----------------------------------------------------------------------===//
-// Other Coprocessor Instructions.  For disassembly only.
+// Other Coprocessor Instructions.
 //
 
-def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                 [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                               imm:$CRm, imm:$opc2)]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
                    "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                    [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]> {
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 376bd9607e4b..f1f3cb9c2ecd 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -94,7 +94,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let Inst{20}    = L_bit;
   }
   def DIA_UPD :
-    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
@@ -102,7 +103,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let Inst{20}    = L_bit;
   }
   def DDB_UPD :
-    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
@@ -124,7 +126,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let D = VFPNeonDomain;
   }
   def SIA_UPD :
-    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
@@ -136,7 +139,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let D = VFPNeonDomain;
   }
   def SDB_UPD :
-    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
@@ -162,6 +166,15 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
 def : MnemonicAlias<"vldm", "vldmia">;
 def : MnemonicAlias<"vstm", "vstmia">;
 
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
 //===----------------------------------------------------------------------===//
@@ -860,7 +873,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
 } // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
 
 //===----------------------------------------------------------------------===//
-// FP FMA Operations.
+// FP Multiply-Accumulate Operations.
 //
 
 def VMLAD : ADbI<0b11100, 0b00, 0, 0,
@@ -977,12 +990,12 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
 
 let neverHasSideEffects = 1 in {
 def VMOVDcc  : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
-                    Size4Bytes, IIC_fpUNA64,
+                    4, IIC_fpUNA64,
                     [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
                  RegConstraint<"$Dn = $Dd">;
 
 def VMOVScc  : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
-                    Size4Bytes, IIC_fpUNA32,
+                    4, IIC_fpUNA32,
                     [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
                  RegConstraint<"$Sn = $Sd">;
 } // neverHasSideEffects
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f4645f15a66f..c6efea1d7806 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -329,13 +329,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       if (NewBase == 0)
         return false;
     }
-    int BaseOpc = !isThumb2
-      ? ARM::ADDri
-      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
+    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
     if (Offset < 0) {
-      BaseOpc = !isThumb2
-        ? ARM::SUBri
-        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
+      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
       Offset = - Offset;
     }
     int ImmedOffset = isThumb2
@@ -516,8 +512,6 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
   if (!MI)
     return false;
   if (MI->getOpcode() != ARM::t2SUBri &&
-      MI->getOpcode() != ARM::t2SUBrSPi &&
-      MI->getOpcode() != ARM::t2SUBrSPi12 &&
       MI->getOpcode() != ARM::tSUBspi &&
       MI->getOpcode() != ARM::SUBri)
     return false;
@@ -541,8 +535,6 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
   if (!MI)
     return false;
   if (MI->getOpcode() != ARM::t2ADDri &&
-      MI->getOpcode() != ARM::t2ADDrSPi &&
-      MI->getOpcode() != ARM::t2ADDrSPi12 &&
       MI->getOpcode() != ARM::tADDspi &&
       MI->getOpcode() != ARM::ADDri)
     return false;
@@ -1461,19 +1453,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   while (++I != E) {
     if (I->isDebugValue() || MemOps.count(&*I))
       continue;
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
       return false;
-    if (isLd && TID.mayStore())
+    if (isLd && MCID.mayStore())
       return false;
     if (!isLd) {
-      if (TID.mayLoad())
+      if (MCID.mayLoad())
         return false;
       // It's not safe to move the first 'str' down.
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (TID.mayStore())
+      if (MCID.mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1672,14 +1664,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
           Ops.pop_back();
           Ops.pop_back();
 
-          const TargetInstrDesc &TID = TII->get(NewOpc);
-          const TargetRegisterClass *TRC = TID.OpInfo[0].getRegClass(TRI);
+          const MCInstrDesc &MCID = TII->get(NewOpc);
+          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
           MRI->constrainRegClass(EvenReg, TRC);
           MRI->constrainRegClass(OddReg, TRC);
 
           // Form the pair instruction.
           if (isLd) {
-            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID)
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
               .addReg(EvenReg, RegState::Define)
               .addReg(OddReg, RegState::Define)
               .addReg(BaseReg);
@@ -1691,7 +1683,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumLDRDFormed;
           } else {
-            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID)
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
               .addReg(EvenReg)
               .addReg(OddReg)
               .addReg(BaseReg);
@@ -1742,8 +1734,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   while (MBBI != E) {
     for (; MBBI != E; ++MBBI) {
       MachineInstr *MI = MBBI;
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.isCall() || TID.isTerminator()) {
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.isCall() || MCID.isTerminator()) {
         // Stop at barriers.
         ++MBBI;
         break;
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
index c5f727d60642..39be3f0e39f8 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -21,8 +21,11 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
@@ -32,19 +35,30 @@ namespace {
 class ARMMCCodeEmitter : public MCCodeEmitter {
   ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
-  const ARMSubtarget *Subtarget;
-  MCContext &Ctx;
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
 
 public:
-  ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), TII(*TM.getInstrInfo()),
-      Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+  ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti) {
   }
 
   ~ARMMCCodeEmitter() {}
 
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumb2() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+  }
+  bool isTargetDarwin() const {
+    Triple TT(STI.getTargetTriple());
+    Triple::OSType OS = TT.getOS();
+    return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+  }
+
   unsigned getMachineSoImmOpValue(unsigned SoImm) const;
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
@@ -320,9 +334,10 @@ public:
 
 } // end anonymous namespace
 
-MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
-  return new ARMMCCodeEmitter(TM, Ctx);
+  return new ARMMCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
@@ -330,7 +345,7 @@ MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
     // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
     // set to 1111.
@@ -349,7 +364,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0xF0FFFFFF;
     EncodedValue |= 0x09000000;
   }
@@ -362,7 +377,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0x00FFFFFF;
     EncodedValue |= 0xEE000000;
   }
@@ -374,7 +389,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
 /// them to their Thumb2 form if we are currently in Thumb2 mode.
 unsigned ARMMCCodeEmitter::
 VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0x0FFFFFFF;
     EncodedValue |= 0xE0000000;
   }
@@ -515,7 +530,7 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
                        SmallVectorImpl<MCFixup> &Fixups) const {
   // FIXME: This really, really shouldn't use TargetMachine. We don't want
   // coupling between MC and TM anywhere we can help it.
-  if (Subtarget->isThumb2())
+  if (isThumb2())
     return
       ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
   return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
@@ -624,7 +639,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
     const MCExpr *Expr = MO.getExpr();
 
     MCFixupKind Kind;
-    if (Subtarget->isThumb2())
+    if (isThumb2())
       Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
     else
       Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
@@ -709,22 +724,22 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
     switch (ARM16Expr->getKind()) {
     default: assert(0 && "Unsupported ARMFixup");
     case ARMMCExpr::VK_ARM_HI16:
-      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
-        Kind = MCFixupKind(Subtarget->isThumb2()
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movt_hi16_pcrel
                            : ARM::fixup_arm_movt_hi16_pcrel);
       else
-        Kind = MCFixupKind(Subtarget->isThumb2()
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movt_hi16
                            : ARM::fixup_arm_movt_hi16);
       break;
     case ARMMCExpr::VK_ARM_LO16:
-      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
-        Kind = MCFixupKind(Subtarget->isThumb2()
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movw_lo16_pcrel
                            : ARM::fixup_arm_movw_lo16_pcrel);
       else
-        Kind = MCFixupKind(Subtarget->isThumb2()
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movw_lo16
                            : ARM::fixup_arm_movw_lo16);
       break;
@@ -898,7 +913,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
     MCFixupKind Kind;
-    if (Subtarget->isThumb2())
+    if (isThumb2())
       Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
     else
       Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
@@ -1274,21 +1289,21 @@ void ARMMCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   // Pseudo instructions don't get encoded.
-  const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
   if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
     return;
+
   int Size;
-  // Basic size info comes from the TSFlags field.
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: llvm_unreachable("Unexpected instruction size!");
-  case ARMII::Size2Bytes: Size = 2; break;
-  case ARMII::Size4Bytes: Size = 4; break;
-  }
+  if (Desc.getSize() == 2 || Desc.getSize() == 4)
+    Size = Desc.getSize();
+  else
+    llvm_unreachable("Unexpected instruction size!");
+  
   uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
   // Thumb 32-bit wide instructions need to emit the high order halfword
   // first.
-  if (Subtarget->isThumb() && Size == 4) {
+  if (isThumb() && Size == 4) {
     EmitConstant(Binary >> 16, 2, OS);
     EmitConstant(Binary & 0xffff, 2, OS);
   } else
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 59d60506fc0f..7411b599f0fa 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -23,43 +23,94 @@
 using namespace llvm;
 
 
-static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
-                              ARMAsmPrinter &Printer) {
-  MCContext &Ctx = Printer.OutContext;
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+                                      const MCSymbol *Symbol) {
   const MCExpr *Expr;
   switch (MO.getTargetFlags()) {
   default: {
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                   OutContext);
     switch (MO.getTargetFlags()) {
     default:
       assert(0 && "Unknown target flag on symbol operand");
     case 0:
       break;
     case ARMII::MO_LO16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
-      Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
       break;
     case ARMII::MO_HI16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
-      Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
       break;
     }
     break;
   }
 
   case ARMII::MO_PLT:
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+                                   OutContext);
     break;
   }
 
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
-                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
-                                   Ctx);
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
   return MCOperand::CreateExpr(Expr);
 
 }
 
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+                                 MCOperand &MCOp) {
+  switch (MO.getType()) {
+  default:
+    assert(0 && "unknown operand type");
+    return false;
+  case MachineOperand::MO_Register:
+    // Ignore all non-CPSR implicit register operands.
+    if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+        MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+   MCOp = GetSymbolRef(MO,
+                        GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_FPImmediate: {
+    APFloat Val = MO.getFPImm()->getValueAPF();
+    bool ignored;
+    Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+    break;
+  }
+  }
+  return true;
+}
+
 void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                         ARMAsmPrinter &AP) {
   OutMI.setOpcode(MI->getOpcode());
@@ -68,48 +119,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     const MachineOperand &MO = MI->getOperand(i);
 
     MCOperand MCOp;
-    switch (MO.getType()) {
-    default:
-      MI->dump();
-      assert(0 && "unknown operand type");
-    case MachineOperand::MO_Register:
-      // Ignore all non-CPSR implicit register operands.
-      if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
-      assert(!MO.getSubReg() && "Subregs should be eliminated!");
-      MCOp = MCOperand::CreateReg(MO.getReg());
-      break;
-    case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
-      break;
-    case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       MO.getMBB()->getSymbol(), AP.OutContext));
-      break;
-    case MachineOperand::MO_GlobalAddress:
-      MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
-      break;
-    case MachineOperand::MO_ExternalSymbol:
-      MCOp = GetSymbolRef(MO,
-                          AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
-      break;
-    case MachineOperand::MO_JumpTableIndex:
-      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
-      break;
-    case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
-      break;
-    case MachineOperand::MO_BlockAddress:
-      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
-      break;
-    case MachineOperand::MO_FPImmediate: {
-      APFloat Val = MO.getFPImm()->getValueAPF();
-      bool ignored;
-      Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
-      MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
-      break;
-    }
-    }
-
-    OutMI.addOperand(MCOp);
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
   }
 }
diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 000000000000..a36e47da06d4
--- /dev/null
+++ b/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                    const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCFragment *Fragment,
+                                    const MCFixup &Fixup,
+                                    MCValue Target,
+                                    unsigned Log2Size,
+                                    uint64_t &FixedValue);
+  void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                                   const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue);
+
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                              unsigned &Log2Size) {
+  RelocType = unsigned(macho::RIT_Vanilla);
+  Log2Size = ~0U;
+
+  switch (Kind) {
+  default:
+    return false;
+
+  case FK_Data_1:
+    Log2Size = llvm::Log2_32(1);
+    return true;
+  case FK_Data_2:
+    Log2Size = llvm::Log2_32(2);
+    return true;
+  case FK_Data_4:
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  case FK_Data_8:
+    Log2Size = llvm::Log2_32(8);
+    return true;
+
+    // Handle 24-bit branch kinds.
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+    // Handle Thumb branches.
+  case ARM::fixup_arm_thumb_br:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(2);
+    return true;
+
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  }
+}
+
+void ARMMachObjectWriter::
+RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_ARM_Half;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint32_t Value2 = 0;
+  uint64_t SecAddr =
+    Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_ARM_HalfDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+  //
+  // For these two r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
+  // the other half of the relocated expression is in the following pair
+  // relocation entry in the the low 16 bits of r_address field.
+  unsigned ThumbBit = 0;
+  unsigned MovtBit = 0;
+  switch ((unsigned)Fixup.getKind()) {
+  default: break;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    MovtBit = 1;
+    break;
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    MovtBit = 1;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    ThumbBit = 1;
+    break;
+  }
+
+
+  if (Type == macho::RIT_ARM_HalfDifference) {
+    uint32_t OtherHalf = MovtBit
+      ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((OtherHalf       <<  0) |
+                 (macho::RIT_Pair << 24) |
+                 (MovtBit         << 28) |
+                 (ThumbBit        << 29) |
+                 (IsPCRel         << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (MovtBit     << 28) |
+               (ThumbBit    << 29) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_Difference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+                                           const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size;
+  unsigned RelocType = macho::RIT_Vanilla;
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+    report_fatal_error("unknown ARM fixup kind!");
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry.  Differences always require scattered
+  // relocations.
+  if (Target.getSymB()) {
+    if (RelocType == macho::RIT_ARM_Half ||
+        RelocType == macho::RIT_ARM_HalfDifference)
+      return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                         Target, FixedValue);
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+  }
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // FIXME: For other platforms, we need to use scattered relocations for
+  // internal relocations with offsets.  If this is an internal relocation with
+  // an offset, it also needs a scattered relocation entry.
+  //
+  // Is this right for ARM?
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel && RelocType == macho::RIT_Vanilla)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // FIXME!
+    report_fatal_error("FIXME: relocations to absolute targets "
+                       "not yet implemented");
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    // The type is determined by the fixup kind.
+    Type = RelocType;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 99418733c376..76eb496bde42 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -200,45 +200,16 @@ def FPEXC   : ARMReg<8, "fpexc">;
 // r11 == Frame Pointer (arm-style backtraces)
 // r10 == Stack Limit
 //
-def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-                                           R7, R8, R9, R10, R11, R12,
-                                           SP, LR, PC]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    GPRClass::iterator
-    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO;
-      return ARM_GPR_AO;
-    }
-
-    GPRClass::iterator
-    GPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
-      return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
-    }
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+                                               SP, LR, PC)> {
+  // Allocate LR as the first CSR since it is always saved anyway.
+  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+  // know how to spill them. If we make our prologue/epilogue code smarter at
+  // some point, we can go back to using the above allocation orders for the
+  // Thumb1 instructions that know how to use hi regs.
+  let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -246,263 +217,98 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
 // register range for operands, but have undefined behaviours when PC
 // or SP (R13 or R15) are used. The ARM ISA refers to these operands
 // via the BadReg() pseudo-code description.
-def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-                                            R7, R8, R9, R10, R11, R12, LR]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10,
-      ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO;
-      return ARM_rGPR_AO;
-    }
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
-      return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
-    }
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
 // Thumb registers are R0-R7 normally. Some instructions can still use
 // the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
 
 // For tail calls, we can't use callee-saved registers, as they are restored
 // to the saved value before the tail call, which would clobber a call address.
 // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
 // this class and the preceding one(!)  This is what we want.
-def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // R9 is available.
-    static const unsigned ARM_GPR_R9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R9, ARM::R12 };
-    // R9 is not available.
-    static const unsigned ARM_GPR_NOR9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC;
-      return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
-    }
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
-
-      return Subtarget.isTargetDarwin() ?
-        ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
-        ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
-    }
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+  let AltOrders = [(and tcGPR, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
-
 // Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
-  S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
-  S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
 
 // Subset of SPR which can be used as a source of NEON scalars for 16-bit
 // operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32,
-                          [S0, S1,  S2,  S3,  S4,  S5,  S6,  S7,
-                           S8, S9, S10, S11, S12, S13, S14, S15]>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
 
 // Scalar double precision floating point / generic 64-bit vector register
 // class.
 // ARM requires only word alignment for double. It's more performant if it
 // is double-word alignment though.
 def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                        [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                         D8,  D9,  D10, D11, D12, D13, D14, D15,
-                         D16, D17, D18, D19, D20, D21, D22, D23,
-                         D24, D25, D26, D27, D28, D29, D30, D31]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // VFP2 / VFPv3-D16
-    static const unsigned ARM_DPR_VFP2[] = {
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-    // VFP3: D8-D15 are callee saved and should be allocated last.
-    // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
-    static const unsigned ARM_DPR_VFP3[] = {
-      ARM::D16, ARM::D17, ARM::D18, ARM::D19,
-      ARM::D20, ARM::D21, ARM::D22, ARM::D23,
-      ARM::D24, ARM::D25, ARM::D26, ARM::D27,
-      ARM::D28, ARM::D29, ARM::D30, ARM::D31,
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-
-    DPRClass::iterator
-    DPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3;
-      return ARM_DPR_VFP2;
-    }
-
-    DPRClass::iterator
-    DPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
-      else
-        return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
-    }
-  }];
+                        (sequence "D%u", 0, 31)> {
+  // Allocate non-VFP2 registers D16-D31 first.
+  let AltOrders = [(rotl DPR, 16)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
 def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
+                             (trunc DPR, 16)> {
   let SubRegClasses = [(SPR ssub_0, ssub_1)];
 }
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
 def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                          [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
+                          (trunc DPR, 8)> {
   let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
 }
 
 // Generic 128-bit vector register class.
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
-                        [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
-                         Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
+                        (sequence "Q%u", 0, 15)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // Q4-Q7 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QPR[] = {
-      ARM::Q8,  ARM::Q9,  ARM::Q10, ARM::Q11,
-      ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
-      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
-      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7 };
-
-    QPRClass::iterator
-    QPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QPR;
-    }
-
-    QPRClass::iterator
-    QPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases Q8-Q15 first.
+  let AltOrders = [(rotl QPR, 8)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QPR that have 32-bit SPR subregs.
 def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                             128,
-                             [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7]> {
+                             128, (trunc QPR, 8)> {
   let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_VFP2 dsub_0, dsub_1)];
 }
 
 // Subset of QPR that have DPR_8 and SPR_8 subregs.
 def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                           128,
-                           [Q0,  Q1,  Q2,  Q3]> {
+                           128, (trunc QPR, 4)> {
   let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_8 dsub_0, dsub_1)];
 }
 
 // Pseudo 256-bit vector register class to model pairs of Q registers
 // (4 consecutive D registers).
-def QQPR : RegisterClass<"ARM", [v4i64],
-                         256,
-                         [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR qsub_0, qsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQ2-QQ3 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQPR[] = {
-      ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
-      ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQPR;
-    }
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQPR, 4)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QQPR that have 32-bit SPR subregs.
-def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
-                              256,
-                              [QQ0, QQ1, QQ2, QQ3]> {
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
   let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR_VFP2 qsub_0, qsub_1)];
@@ -511,35 +317,16 @@ def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
 
 // Pseudo 512-bit vector register class to model 4 consecutive Q registers
 // (8 consecutive D registers).
-def QQQQPR : RegisterClass<"ARM", [v8i64],
-                         256,
-                         [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
                             dsub_4, dsub_5, dsub_6, dsub_7),
                        (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQQQ1 is callee saved and should be allocated last.
-    // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQQQPR[] = {
-      ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQQQPR;
-    }
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQQQPR, 2)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Condition code registers.
-def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]> {
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c6f266b07531..1cab9e44ce75 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -7,17 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM specific subclass of TargetSubtarget.
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "ARMSubtarget.h"
-#include "ARMGenSubtarget.inc"
 #include "ARMBaseRegisterInfo.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -31,17 +35,25 @@ static cl::opt<bool>
 StrictAlign("arm-strict-align", cl::Hidden,
             cl::desc("Disallow all unaligned memory accesses"));
 
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
-                           bool isT)
-  : ARMArchVersion(V4)
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS)
+  : ARMGenSubtargetInfo(TT, CPU, FS)
   , ARMProcFamily(Others)
-  , ARMFPUType(None)
+  , HasV4TOps(false)
+  , HasV5TOps(false)
+  , HasV5TEOps(false)
+  , HasV6Ops(false)
+  , HasV6T2Ops(false)
+  , HasV7Ops(false)
+  , HasVFPv2(false)
+  , HasVFPv3(false)
+  , HasNEON(false)
   , UseNEONForSinglePrecisionFP(false)
   , SlowFPVMLx(false)
   , HasVMLxForwarding(false)
   , SlowFPBrcc(false)
-  , IsThumb(isT)
-  , ThumbMode(Thumb1)
+  , InThumbMode(false)
+  , HasThumb2(false)
   , NoARM(false)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
@@ -56,94 +68,40 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , HasMPExtension(false)
   , FPOnlySP(false)
   , AllowsUnalignedMem(false)
+  , Thumb2DSP(false)
   , stackAlignment(4)
-  , CPUString("generic")
+  , CPUString(CPU)
   , TargetTriple(TT)
   , TargetABI(ARM_ABI_APCS) {
-  // Default to soft float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Soft;
-
   // Determine default and user specified characteristics
-
-  // When no arch is specified either by CPU or by attributes, make the default
-  // ARMv4T.
-  const char *ARMArchFeature = "";
-  if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
-    ARMArchVersion = V4T;
-    ARMArchFeature = ",+v4t";
+  if (CPUString.empty())
+    CPUString = "generic";
+
+  // Insert the architecture feature derived from the target triple into the
+  // feature string. This is important for setting features that are implied
+  // based on the architecture version.
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS;
+    else
+      ArchFS = FS;
   }
+  ParseSubtargetFeatures(CPUString, ArchFS);
 
-  // Set the boolean corresponding to the current target triple, or the default
-  // if one cannot be determined, to true.
-  unsigned Len = TT.length();
-  unsigned Idx = 0;
+  // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+  // ARM version or CPU and then remove this.
+  if (!HasV6T2Ops && hasThumb2())
+    HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
 
-  if (Len >= 5 && TT.substr(0, 4) == "armv")
-    Idx = 4;
-  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
-    IsThumb = true;
-    if (Len >= 7 && TT[5] == 'v')
-      Idx = 6;
-  }
-  if (Idx) {
-    unsigned SubVer = TT[Idx];
-    if (SubVer >= '7' && SubVer <= '9') {
-      ARMArchVersion = V7A;
-      ARMArchFeature = ",+v7a";
-      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
-        ARMArchVersion = V7M;
-        ARMArchFeature = ",+v7m";
-      }
-    } else if (SubVer == '6') {
-      ARMArchVersion = V6;
-      ARMArchFeature = ",+v6";
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
-        ARMArchVersion = V6T2;
-        ARMArchFeature = ",+v6t2";
-      }
-    } else if (SubVer == '5') {
-      ARMArchVersion = V5T;
-      ARMArchFeature = ",+v5t";
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
-        ARMArchVersion = V5TE;
-        ARMArchFeature = ",+v5te";
-      }
-    } else if (SubVer == '4') {
-      if (Len >= Idx+2 && TT[Idx+1] == 't') {
-        ARMArchVersion = V4T;
-        ARMArchFeature = ",+v4t";
-      } else {
-        ARMArchVersion = V4;
-        ARMArchFeature = "";
-      }
-    }
-  }
-
-  if (TT.find("eabi") != std::string::npos)
-    TargetABI = ARM_ABI_AAPCS;
-
-  // Parse features string.  If the first entry in FS (the CPU) is missing,
-  // insert the architecture feature derived from the target triple.  This is
-  // important for setting features that are implied based on the architecture
-  // version.
-  std::string FSWithArch;
-  if (FS.empty())
-    FSWithArch = std::string(ARMArchFeature);
-  else if (FS.find(',') == 0)
-    FSWithArch = std::string(ARMArchFeature) + FS;
-  else
-    FSWithArch = FS;
-  CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
 
   // After parsing Itineraries, set ItinData.IssueWidth.
   computeIssueWidth();
 
-  // Thumb2 implies at least V6T2.
-  if (ARMArchVersion >= V6T2)
-    ThumbMode = Thumb2;
-  else if (ThumbMode >= Thumb2)
-    ARMArchVersion = V6T2;
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
 
   if (isAAPCS_ABI())
     stackAlignment = 8;
@@ -151,7 +109,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   if (!isTargetDarwin())
     UseMovt = hasV6T2Ops();
   else {
-    IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+    IsR9Reserved = ReserveR9 | !HasV6Ops;
     UseMovt = DarwinUseMOVT && hasV6T2Ops();
   }
 
@@ -247,9 +205,9 @@ void ARMSubtarget::computeIssueWidth() {
 
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
-           TargetSubtarget::AntiDepBreakMode& Mode,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
            RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   CriticalPathRCs.clear();
   CriticalPathRCs.push_back(&ARM::GPRRegClass);
   return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0271c873f191..c6508723a576 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -7,50 +7,49 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the ARM specific subclass of TargetSubtarget.
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 
-class ARMSubtarget : public TargetSubtarget {
+class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
-  enum ARMArchEnum {
-    V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
-  };
-
   enum ARMProcFamilyEnum {
     Others, CortexA8, CortexA9
   };
 
-  enum ARMFPEnum {
-    None, VFPv2, VFPv3, NEON
-  };
-
-  enum ThumbTypeEnum {
-    Thumb1,
-    Thumb2
-  };
-
-  /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
-  /// V6, V6T2, V7A, V7M.
-  ARMArchEnum ARMArchVersion;
-
   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
-  /// ARMFPUType - Floating Point Unit type.
-  ARMFPEnum ARMFPUType;
+  /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+  /// Specify whether target support specific ARM ISA variants.
+  bool HasV4TOps;
+  bool HasV5TOps;
+  bool HasV5TEOps;
+  bool HasV6Ops;
+  bool HasV6T2Ops;
+  bool HasV7Ops;
+
+  /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
+  /// supported.
+  bool HasVFPv2;
+  bool HasVFPv3;
+  bool HasNEON;
 
   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
   /// specified. Use the method useNEONForSinglePrecisionFP() to
@@ -68,11 +67,11 @@ protected:
   /// SlowFPBrcc - True if floating point compare + branch is slow.
   bool SlowFPBrcc;
 
-  /// IsThumb - True if we are in thumb mode, false if in ARM mode.
-  bool IsThumb;
+  /// InThumbMode - True if compiling for Thumb, false for ARM.
+  bool InThumbMode;
 
-  /// ThumbMode - Indicates supported Thumb version.
-  ThumbTypeEnum ThumbMode;
+  /// HasThumb2 - True if Thumb2 instructions are supported.
+  bool HasThumb2;
 
   /// NoARM - True if subtarget does not support ARM mode execution.
   bool NoARM;
@@ -128,6 +127,10 @@ protected:
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
   bool AllowsUnalignedMem;
 
+  /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+  /// and such) instructions in Thumb2 code.
+  bool Thumb2DSP;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -154,7 +157,8 @@ protected:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb);
+  ARMSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -165,28 +169,28 @@ protected:
   }
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   void computeIssueWidth();
 
-  bool hasV4TOps()  const { return ARMArchVersion >= V4T;  }
-  bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
-  bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
-  bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
-  bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
-  bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
+  bool hasV4TOps()  const { return HasV4TOps;  }
+  bool hasV5TOps()  const { return HasV5TOps;  }
+  bool hasV5TEOps() const { return HasV5TEOps; }
+  bool hasV6Ops()   const { return HasV6Ops;   }
+  bool hasV6T2Ops() const { return HasV6T2Ops; }
+  bool hasV7Ops()   const { return HasV7Ops;  }
 
   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
 
   bool hasARMOps() const { return !NoARM; }
 
-  bool hasVFP2() const { return ARMFPUType >= VFPv2; }
-  bool hasVFP3() const { return ARMFPUType >= VFPv3; }
-  bool hasNEON() const { return ARMFPUType >= NEON;  }
+  bool hasVFP2() const { return HasVFPv2; }
+  bool hasVFP3() const { return HasVFPv3; }
+  bool hasNEON() const { return HasNEON;  }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
+
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool hasDataBarrier() const { return HasDataBarrier; }
@@ -197,6 +201,7 @@ protected:
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool hasMPExtension() const { return HasMPExtension; }
+  bool hasThumb2DSP() const { return Thumb2DSP; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
@@ -209,10 +214,10 @@ protected:
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
-  bool isThumb() const { return IsThumb; }
-  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
-  bool hasThumb2() const { return ThumbMode >= Thumb2; }
+  bool isThumb() const { return InThumbMode; }
+  bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+  bool isThumb2() const { return InThumbMode && HasThumb2; }
+  bool hasThumb2() const { return HasThumb2; }
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
@@ -226,7 +231,7 @@ protected:
 
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const;
 
   /// getInstrItins - Return the instruction itineraies based on subtarget
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 29aa4f7ad2ce..f0b176ad6981 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetMachine.h"
-#include "ARMMCAsmInfo.h"
 #include "ARMFrameLowering.h"
 #include "ARM.h"
 #include "llvm/PassManager.h"
@@ -22,15 +21,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return new ARMMCAsmInfoDarwin();
-
-  return new ARMELFMCAsmInfo();
-}
-
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
@@ -56,10 +46,6 @@ extern "C" void LLVMInitializeARMTarget() {
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
   RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
-  RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
-
   // Register the MC Code Emitter
   TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
@@ -78,18 +64,23 @@ extern "C" void LLVMInitializeARMTarget() {
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
                                            const std::string &TT,
-                                           const std::string &FS,
-                                           bool isThumb)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, isThumb),
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
+
+  // Default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
 }
 
 ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS)
-  : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
+  : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32") :
@@ -105,8 +96,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : ARMBaseTargetMachine(T, TT, FS, true),
+  : ARMBaseTargetMachine(T, TT, CPU, FS),
     InstrInfo(Subtarget.hasThumb2()
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index e0aa149c4cc2..bc3d46a50ea5 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,7 @@ private:
 
 public:
   ARMBaseTargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS, bool isThumb);
+                       const std::string &CPU, const std::string &FS);
 
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
@@ -70,7 +70,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMFrameLowering    FrameLowering;
  public:
   ARMTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS);
+                   const std::string &CPU, const std::string &FS);
 
   virtual const ARMRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
@@ -109,7 +109,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   OwningPtr<ARMFrameLowering> FrameLowering;
 public:
   ThumbTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
   virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index 2428ce16d3d5..d9a5fa223b4b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -87,8 +87,9 @@ public:
     : ARMBaseAsmLexer(T, MAI) {
     std::string tripleString("arm-unknown-unknown");
     std::string featureString;
+    std::string CPU;
     OwningPtr<const TargetMachine>
-      targetMachine(T.createTargetMachine(tripleString, featureString));
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
     InitRegisterMap(targetMachine->getRegisterInfo());
   }
 };
@@ -99,8 +100,9 @@ public:
     : ARMBaseAsmLexer(T, MAI) {
     std::string tripleString("thumb-unknown-unknown");
     std::string featureString;
+    std::string CPU;
     OwningPtr<const TargetMachine>
-      targetMachine(T.createTargetMachine(tripleString, featureString));
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
     InitRegisterMap(targetMachine->getRegisterInfo());
   }
 };
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4bc12c9c2b49..a4741270c7a5 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -20,14 +20,17 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+
 using namespace llvm;
 
 namespace {
@@ -35,8 +38,8 @@ namespace {
 class ARMOperand;
 
 class ARMAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
-  TargetMachine &TM;
 
   MCAsmParser &getParser() const { return Parser; }
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -47,7 +50,7 @@ class ARMAsmParser : public TargetAsmParser {
   int TryParseRegister();
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
   bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+  int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
   bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
   bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
                    ARMII::AddrMode AddrMode);
@@ -79,6 +82,18 @@ class ARMAsmParser : public TargetAsmParser {
   void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
                              bool &CanAcceptPredicationCode);
 
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumbOne() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+  }
+  void SwitchMode() {
+    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+    setAvailableFeatures(FB);
+  }
+
   /// @name Auto-generated Match Functions
   /// {
 
@@ -113,13 +128,13 @@ class ARMAsmParser : public TargetAsmParser {
                                   const SmallVectorImpl<MCParsedAsmOperand*> &);
 
 public:
-  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
-      MCAsmParserExtension::Initialize(_Parser);
-      // Initialize the set of available features.
-      setAvailableFeatures(ComputeAvailableFeatures(
-          &TM.getSubtarget<ARMSubtarget>()));
-    }
+  ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -146,6 +161,7 @@ class ARMOperand : public MCParsedAsmOperand {
     RegisterList,
     DPRRegisterList,
     SPRRegisterList,
+    ShiftedRegister,
     Shifter,
     Token
   } Kind;
@@ -207,8 +223,14 @@ class ARMOperand : public MCParsedAsmOperand {
 
     struct {
       ARM_AM::ShiftOpc ShiftTy;
-      unsigned RegNum;
+      unsigned Imm;
     } Shift;
+    struct {
+      ARM_AM::ShiftOpc ShiftTy;
+      unsigned SrcReg;
+      unsigned ShiftReg;
+      unsigned ShiftImm;
+    } ShiftedReg;
   };
 
   ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -255,6 +277,9 @@ public:
     case Shifter:
       Shift = o.Shift;
       break;
+    case ShiftedRegister:
+      ShiftedReg = o.ShiftedReg;
+      break;
     }
   }
 
@@ -350,6 +375,46 @@ public:
   bool isCondCode() const { return Kind == CondCode; }
   bool isCCOut() const { return Kind == CCOut; }
   bool isImm() const { return Kind == Immediate; }
+  bool isImm0_255() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 256;
+  }
+  bool isImm0_7() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 8;
+  }
+  bool isImm0_15() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 16;
+  }
+  bool isImm0_65535() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 65536;
+  }
+  bool isT2SOImm() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getT2SOImmVal(Value) != -1;
+  }
   bool isReg() const { return Kind == Register; }
   bool isRegList() const { return Kind == RegisterList; }
   bool isDPRRegList() const { return Kind == DPRRegisterList; }
@@ -358,6 +423,7 @@ public:
   bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
   bool isMemory() const { return Kind == Memory; }
   bool isShifter() const { return Kind == Shifter; }
+  bool isShiftedReg() const { return Kind == ShiftedRegister; }
   bool isMemMode2() const {
     if (getMemAddrMode() != ARMII::AddrMode2)
       return false;
@@ -488,6 +554,18 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
+  void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+    assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
+    assert((ShiftedReg.ShiftReg == 0 ||
+            ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
+           "Invalid shifted register operand!");
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+    Inst.addOperand(MCOperand::CreateImm(
+      ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+  }
+
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(
@@ -515,6 +593,31 @@ public:
     addExpr(Inst, getImm());
   }
 
+  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
   void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -648,7 +751,7 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
   }
 
-  virtual void dump(raw_ostream &OS) const;
+  virtual void print(raw_ostream &OS) const;
 
   static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
     ARMOperand *Op = new ARMOperand(CondCode);
@@ -699,6 +802,21 @@ public:
     return Op;
   }
 
+  static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+                                           unsigned SrcReg,
+                                           unsigned ShiftReg,
+                                           unsigned ShiftImm,
+                                           SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(ShiftedRegister);
+    Op->ShiftedReg.ShiftTy = ShTy;
+    Op->ShiftedReg.SrcReg = SrcReg;
+    Op->ShiftedReg.ShiftReg = ShiftReg;
+    Op->ShiftedReg.ShiftImm = ShiftImm;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
                                    SMLoc S, SMLoc E) {
     ARMOperand *Op = new ARMOperand(Shifter);
@@ -802,7 +920,7 @@ public:
 
 } // end anonymous namespace.
 
-void ARMOperand::dump(raw_ostream &OS) const {
+void ARMOperand::print(raw_ostream &OS) const {
   switch (Kind) {
   case CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
@@ -863,7 +981,15 @@ void ARMOperand::dump(raw_ostream &OS) const {
     OS << "<register " << getReg() << ">";
     break;
   case Shifter:
-    OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">";
+    OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+    break;
+  case ShiftedRegister:
+    OS << "<so_reg"
+       << ShiftedReg.SrcReg
+       << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
+       << ", " << ShiftedReg.ShiftReg << ", "
+       << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+       << ">";
     break;
   case RegisterList:
   case DPRRegisterList:
@@ -927,11 +1053,12 @@ int ARMAsmParser::TryParseRegister() {
   return RegNum;
 }
 
-/// Try to parse a register name.  The token must be an Identifier when called,
-/// and if it is a register name the token is eaten and the register number is
-/// returned.  Otherwise return -1.
-///
-bool ARMAsmParser::TryParseShiftRegister(
+// Try to parse a shifter  (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::TryParseShiftRegister(
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
@@ -948,18 +1075,69 @@ bool ARMAsmParser::TryParseShiftRegister(
       .Default(ARM_AM::no_shift);
 
   if (ShiftTy == ARM_AM::no_shift)
-    return true;
-
-  Parser.Lex(); // Eat shift-type operand;
-  int RegNum = TryParseRegister();
-  if (RegNum == -1)
-    return Error(Parser.getTok().getLoc(), "register expected");
+    return 1;
+
+  Parser.Lex(); // Eat the operator.
+
+  // The source register for the shift has already been added to the
+  // operand list, so we need to pop it off and combine it into the shifted
+  // register operand instead.
+  OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+  if (!PrevOp->isReg())
+    return Error(PrevOp->getStartLoc(), "shift must be of a register");
+  int SrcReg = PrevOp->getReg();
+  int64_t Imm = 0;
+  int ShiftReg = 0;
+  if (ShiftTy == ARM_AM::rrx) {
+    // RRX Doesn't have an explicit shift amount. The encoder expects
+    // the shift register to be the same as the source register. Seems odd,
+    // but OK.
+    ShiftReg = SrcReg;
+  } else {
+    // Figure out if this is shifted by a constant or a register (for non-RRX).
+    if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex(); // Eat hash.
+      SMLoc ImmLoc = Parser.getTok().getLoc();
+      const MCExpr *ShiftExpr = 0;
+      if (getParser().ParseExpression(ShiftExpr)) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // The expression must be evaluatable as an immediate.
+      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+      if (!CE) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // Range check the immediate.
+      // lsl, ror: 0 <= imm <= 31
+      // lsr, asr: 0 <= imm <= 32
+      Imm = CE->getValue();
+      if (Imm < 0 ||
+          ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+          ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+        Error(ImmLoc, "immediate shift value out of range");
+        return -1;
+      }
+    } else if (Parser.getTok().is(AsmToken::Identifier)) {
+      ShiftReg = TryParseRegister();
+      SMLoc L = Parser.getTok().getLoc();
+      if (ShiftReg == -1) {
+        Error (L, "expected immediate or register in shift operand");
+        return -1;
+      }
+    } else {
+      Error (Parser.getTok().getLoc(),
+                    "expected immediate or register in shift operand");
+      return -1;
+    }
+  }
 
-  Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc()));
-  Operands.push_back(ARMOperand::CreateShifter(ShiftTy,
+  Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+                                                       ShiftReg, Imm,
                                                S, Parser.getTok().getLoc()));
 
-  return false;
+  return 0;
 }
 
 
@@ -1162,10 +1340,14 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
     .Case("sy",    ARM_MB::SY)
     .Case("st",    ARM_MB::ST)
+    .Case("sh",    ARM_MB::ISH)
     .Case("ish",   ARM_MB::ISH)
+    .Case("shst",  ARM_MB::ISHST)
     .Case("ishst", ARM_MB::ISHST)
     .Case("nsh",   ARM_MB::NSH)
+    .Case("un",    ARM_MB::NSH)
     .Case("nshst", ARM_MB::NSHST)
+    .Case("unst",  ARM_MB::NSHST)
     .Case("osh",   ARM_MB::OSH)
     .Case("oshst", ARM_MB::OSHST)
     .Default(~0U);
@@ -1604,15 +1786,18 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
   default:
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
-  case AsmToken::Identifier:
+  case AsmToken::Identifier: {
     if (!TryParseRegisterWithWriteBack(Operands))
       return false;
-    if (!TryParseShiftRegister(Operands))
+    int Res = TryParseShiftRegister(Operands);
+    if (Res == 0) // success
       return false;
-
+    else if (Res == -1) // irrecoverable error
+      return true;
 
     // Fall though for the Identifier case that is not a register or a
     // special name.
+  }
   case AsmToken::Integer: // things like 1f and 2b as a branch targets
   case AsmToken::Dot: {   // . as a branch target
     // This was not a register so parse other operands that start with an
@@ -1761,30 +1946,35 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
       Mnemonic == "vcle" ||
       (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
        Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
-       Mnemonic == "vqdmlal"))
+       Mnemonic == "vqdmlal" || Mnemonic == "bics"))
     return Mnemonic;
 
-  // First, split out any predication code.
-  unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
-    .Case("eq", ARMCC::EQ)
-    .Case("ne", ARMCC::NE)
-    .Case("hs", ARMCC::HS)
-    .Case("lo", ARMCC::LO)
-    .Case("mi", ARMCC::MI)
-    .Case("pl", ARMCC::PL)
-    .Case("vs", ARMCC::VS)
-    .Case("vc", ARMCC::VC)
-    .Case("hi", ARMCC::HI)
-    .Case("ls", ARMCC::LS)
-    .Case("ge", ARMCC::GE)
-    .Case("lt", ARMCC::LT)
-    .Case("gt", ARMCC::GT)
-    .Case("le", ARMCC::LE)
-    .Case("al", ARMCC::AL)
-    .Default(~0U);
-  if (CC != ~0U) {
-    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
-    PredicationCode = CC;
+  // First, split out any predication code. Ignore mnemonics we know aren't
+  // predicated but do have a carry-set and so weren't caught above.
+  if (Mnemonic != "adcs") {
+    unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+      .Case("eq", ARMCC::EQ)
+      .Case("ne", ARMCC::NE)
+      .Case("hs", ARMCC::HS)
+      .Case("cs", ARMCC::HS)
+      .Case("lo", ARMCC::LO)
+      .Case("cc", ARMCC::LO)
+      .Case("mi", ARMCC::MI)
+      .Case("pl", ARMCC::PL)
+      .Case("vs", ARMCC::VS)
+      .Case("vc", ARMCC::VC)
+      .Case("hi", ARMCC::HI)
+      .Case("ls", ARMCC::LS)
+      .Case("ge", ARMCC::GE)
+      .Case("lt", ARMCC::LT)
+      .Case("gt", ARMCC::GT)
+      .Case("le", ARMCC::LE)
+      .Case("al", ARMCC::AL)
+      .Default(~0U);
+    if (CC != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+      PredicationCode = CC;
+    }
   }
 
   // Next, determine if we have a carry setting bit. We explicitly ignore all
@@ -1824,8 +2014,6 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
 void ARMAsmParser::
 GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
                       bool &CanAcceptPredicationCode) {
-  bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
-
   if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
       Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
@@ -1834,7 +2022,7 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
       Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
       Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
       Mnemonic == "eor" || Mnemonic == "smlal" ||
-      (Mnemonic == "mov" && !isThumb)) {
+      (Mnemonic == "mov" && !isThumbOne())) {
     CanAcceptCarrySet = true;
   } else {
     CanAcceptCarrySet = false;
@@ -1851,10 +2039,9 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
     CanAcceptPredicationCode = true;
   }
 
-  if (isThumb)
+  if (isThumb())
     if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
-        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp" ||
-        Mnemonic == "mov")
+        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
       CanAcceptPredicationCode = false;
 }
 
@@ -1884,20 +2071,22 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
   bool CanAcceptCarrySet, CanAcceptPredicationCode;
   GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
 
+  // If we had a carry-set on an instruction that can't do that, issue an
+  // error.
+  if (!CanAcceptCarrySet && CarrySetting) {
+    Parser.EatToEndOfStatement();
+    return Error(NameLoc, "instruction '" + Head +
+                 "' can not set flags, but 's' suffix specified");
+  }
+
   // Add the carry setting operand, if necessary.
   //
   // FIXME: It would be awesome if we could somehow invent a location such that
   // match errors on this operand would print a nice diagnostic about how the
   // 's' character in the mnemonic resulted in a CCOut operand.
-  if (CanAcceptCarrySet) {
+  if (CanAcceptCarrySet)
     Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
                                                NameLoc));
-  } else {
-    // This mnemonic can't ever accept a carry set, but the user wrote one (or
-    // misspelled another mnemonic).
-
-    // FIXME: Issue a nice error.
-  }
 
   // Add the predication code operand, if necessary.
   if (CanAcceptPredicationCode) {
@@ -1988,7 +2177,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     // that updates the condition codes if it ends in 's'.  So see if the
     // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
     // operand with a value of CPSR.
-    else if(MatchResult == Match_MnemonicFail) {
+    else if (MatchResult == Match_MnemonicFail) {
       // Get the instruction mnemonic, which is the first token.
       StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
       if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
@@ -2174,20 +2363,15 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
     return Error(Parser.getTok().getLoc(), "unexpected token in directive");
   Parser.Lex();
 
-  // FIXME: We need to be able switch subtargets at this point so that
-  // MatchInstructionImpl() will work when it gets the AvailableFeatures which
-  // includes Feature_IsThumb or not to match the right instructions.  This is
-  // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
-  if (Val == 16){
-    assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
-	   "switching between arm/thumb not yet suppported via .code 16)");
+  if (Val == 16) {
+    if (!isThumb())
+      SwitchMode();
     getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
-  }
-  else{
-    assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
-           "switching between thumb/arm not yet suppported via .code 32)");
+  } else {
+    if (isThumb())
+      SwitchMode();
     getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
-   }
+  }
 
   return false;
 }
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index d3b8b54e76b8..21608d0b62fd 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,18 +1,16 @@
 set(LLVM_TARGET_DEFINITIONS ARM.td)
 
-tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(ARMGenRegisterNames.inc -gen-register-enums)
-tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
-tablegen(ARMGenInstrNames.inc -gen-instr-enums)
-tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(ARMGenInstrInfo.inc -gen-instr-info)
 tablegen(ARMGenCodeEmitter.inc -gen-emitter)
 tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
 tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
 tablegen(ARMGenFastISel.inc -gen-fast-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
-tablegen(ARMGenSubtarget.inc -gen-subtarget)
+tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
 tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
 tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
 
@@ -34,10 +32,10 @@ add_llvm_target(ARMCodeGen
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
   ARMJITInfo.cpp
+  ARMMachObjectWriter.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
   ARMLoadStoreOptimizer.cpp
-  ARMMCAsmInfo.cpp
   ARMMCInstLower.cpp
   ARMRegisterInfo.cpp
   ARMSelectionDAGInfo.cpp
@@ -67,3 +65,4 @@ add_subdirectory(TargetInfo)
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index 271ca8c72f08..d89c80a9d457 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -24,8 +24,8 @@
 //#define DEBUG(X) do { X; } while (0)
 
 /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
-/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
-/// describing the operand info for each ARMInsts[i].
+/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
+/// operand info for each ARMInsts[i].
 ///
 /// Together with an instruction's encoding format, we can take advantage of the
 /// NumOperands and the OpInfo fields of the target instruction description in
@@ -46,10 +46,10 @@
 ///   dag DefaultOps = (ops (i32 14), (i32 zero_reg));
 /// }
 ///
-/// which is manifested by the TargetOperandInfo[] of:
+/// which is manifested by the MCOperandInfo[] of:
 ///
-/// { 0, 0|(1<<TOI::Predicate), 0 },
-/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 }
+/// { 0, 0|(1<<MCOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
 ///
 /// So the first predicate MCOperand corresponds to the immediate part of the
 /// ARM condition field (Inst{31-28}), and the second predicate MCOperand
@@ -66,12 +66,14 @@
 ///   dag DefaultOps = (ops (i32 zero_reg));
 /// }
 ///
-/// which is manifested by the one TargetOperandInfo of:
+/// which is manifested by the one MCOperandInfo of:
 ///
-/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 }
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
 ///
-/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR.
-#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
 
 using namespace llvm;
 
@@ -588,9 +590,9 @@ static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
 static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -739,9 +741,9 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (PW) {
       MI.addOperand(MCOperand::CreateReg(0));
       ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-      const TargetInstrDesc &TID = ARMInsts[Opcode];
+      const MCInstrDesc &MCID = ARMInsts[Opcode];
       unsigned IndexMode =
-                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
       unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
                                           ARM_AM::no_shift, IndexMode);
       MI.addOperand(MCOperand::CreateImm(Offset));
@@ -802,7 +804,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (CoprocessorOpcode(Opcode))
     return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   // MRS and MRSsys take one GPR reg Rd.
@@ -901,7 +903,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -976,10 +978,10 @@ static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
 static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  bool isUnary = isUnaryDP(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1041,7 +1043,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
-  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
   }
@@ -1089,10 +1091,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  bool isUnary = isUnaryDP(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1118,7 +1120,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
-  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
   }
@@ -1244,17 +1246,17 @@ static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBac
 static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  bool isPrePost = isPrePostLdSt(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && TID.getNumDefs() > 0) ||
-          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1291,7 +1293,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
@@ -1308,7 +1310,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
   unsigned IndexMode =
-               (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+               (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getIBit(insn) == 0) {
     // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
     // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
@@ -1379,17 +1381,17 @@ static bool HasDualReg(unsigned Opcode) {
 static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  bool isPrePost = isPrePostLdSt(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && TID.getNumDefs() > 0) ||
-          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1433,7 +1435,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
          && "Offset mode or tied_to operand expected");
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
@@ -1451,7 +1453,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
   unsigned IndexMode =
-                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getAM3IBit(insn) == 1) {
     MI.addOperand(MCOperand::CreateReg(0));
 
@@ -1539,7 +1541,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -1591,7 +1593,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1653,8 +1655,8 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
     return false;
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
 
   // Disassemble register def.
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -1696,7 +1698,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
     return false;
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1802,7 +1804,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1842,8 +1844,8 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1858,7 +1860,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // Skip tied_to operand constraint.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     assert(NumOps >= 4 && "Expect >=4 operands");
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -1886,8 +1888,8 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
@@ -1903,7 +1905,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
                     getRegisterEnum(B, RegClassID,
                                     decodeVFPRd(insn, SP))));
 
-    assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+    assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
            "Tied to operand expected");
     MI.addOperand(MI.getOperand(0));
 
@@ -1961,7 +1963,7 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2011,7 +2013,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2136,7 +2138,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2402,8 +2404,8 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
     unsigned alignment, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   // At least one DPR register plus addressing mode #6.
   assert(NumOps >= 3 && "Expect >= 3 operands");
@@ -2507,7 +2509,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     }
 
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
-      assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
+      assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
              "Tied to operand expected");
       MI.addOperand(MCOperand::CreateReg(0));
       ++OpIdx;
@@ -2757,8 +2759,8 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2848,8 +2850,8 @@ enum N2VFlag {
 static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opc];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opc];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2878,7 +2880,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   ++OpIdx;
 
   // VPADAL...
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -2892,7 +2894,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   // VZIP and others have two TIED_TO reg operands.
   int Idx;
   while (OpIdx < NumOps &&
-         (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+         (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // Add TIED_TO operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -2945,8 +2947,8 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
 static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 3 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2964,7 +2966,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -3044,8 +3046,8 @@ enum N3VFlag {
 static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
   assert(NumOps >= 3 &&
@@ -3076,7 +3078,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // VABA, VABAL, VBSLd, VBSLq, ...
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -3091,11 +3093,6 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                          : decodeNEONRm(insn))));
   ++OpIdx;
 
-  // Special case handling for VMOVDneon and VMOVQ because they are marked as
-  // N3RegFrm.
-  if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
-    return true;
-
   // Dm = Inst{5:3-0} => NEON Rm
   // or
   // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
@@ -3163,8 +3160,8 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
 static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -3192,7 +3189,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Process tied_to operand constraint.
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
   }
@@ -3221,11 +3218,11 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
-  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          OpInfo[2].RegClass < 0 &&
@@ -3255,14 +3252,14 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
-  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
-         TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+         MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
          OpInfo[2].RegClass == ARM::GPRRegClassID &&
          OpInfo[3].RegClass < 0 &&
          "Expect >= 3 operands with one dst operand");
@@ -3294,7 +3291,7 @@ static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -3379,7 +3376,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  if (Opcode == ARM::DMB || Opcode == ARM::DSB) {
+  if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
     // Inst{3-0} encodes the memory barrier option for the variants.
     unsigned opt = slice(insn, 3, 0);
     switch (opt) {
@@ -3604,11 +3601,11 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
 
   assert(NumOpsRemaining > 0 && "Invalid argument");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned Idx = MI.getNumOperands();
 
   // First, we check whether this instr specifies the PredicateOperand through
-  // a pair of TargetOperandInfos with isPredicate() property.
+  // a pair of MCOperandInfos with isPredicate() property.
   if (NumOpsRemaining >= 2 &&
       OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
       OpInfo[Idx].RegClass < 0 &&
@@ -3636,13 +3633,13 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
 
   assert(NumOpsRemaining > 0 && "Invalid argument");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   const std::string &Name = ARMInsts[Opcode].Name;
   unsigned Idx = MI.getNumOperands();
   uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
 
   // First, we check whether this instr specifies the PredicateOperand through
-  // a pair of TargetOperandInfos with isPredicate() property.
+  // a pair of MCOperandInfos with isPredicate() property.
   if (NumOpsRemaining >= 2 &&
       OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
       OpInfo[Idx].RegClass < 0 &&
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 9639c8a4b1bd..834c6f65295d 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -350,7 +350,7 @@ static inline unsigned decodeRotate(uint32_t insn) {
 static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -425,8 +425,8 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -454,7 +454,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
          && "Thumb reg operand expected");
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // The reg operand is tied to the first reg operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -511,8 +511,8 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -530,7 +530,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpIdx < NumOps && "More operands expected");
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // The reg operand is tied to the first reg operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -554,7 +554,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -602,7 +602,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 &&
@@ -630,8 +630,8 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   assert(NumOps >= 2
@@ -680,7 +680,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
          && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -708,7 +708,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -733,7 +733,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -810,7 +810,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
     return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   // Predicate operands are handled elsewhere.
   if (NumOps == 2 &&
@@ -958,7 +958,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::tTRAP)
     return true;
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
@@ -989,7 +989,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
@@ -1226,7 +1226,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -1316,7 +1316,7 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 4
@@ -1423,8 +1423,8 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
 static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   // Special case handling.
@@ -1467,7 +1467,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   if (ThreeReg) {
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
       ++OpIdx;
@@ -1521,8 +1521,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1550,7 +1550,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
       return false;
     }
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // The reg operand is tied to the first reg operand.
       MI.addOperand(MI.getOperand(Idx));
     } else {
@@ -1590,8 +1590,8 @@ static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
 /// o t2SSAT16, t2USAT16: Rs sat_pos Rn
 static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
                                  unsigned &NumOpsAdded, BO B) {
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
 
   // Disassemble the register def.
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
@@ -1635,8 +1635,8 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1659,7 +1659,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
   if (TwoReg) {
     assert(NumOps >= 3 && "Expect >= 3 operands");
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
     } else {
@@ -1907,8 +1907,8 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
   // t2PLDs:                      Rn Rm imm2=Inst{5-4}
   // Same pattern applies for t2PLDW* and t2PLI*.
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2073,8 +2073,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
   // See, for example, A6.3.7 Load word: Table A6-18 Load word.
   if (Load && Rn == 15)
     return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2085,7 +2085,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
          "Expect >= 3 operands and first two as reg operands");
 
   bool ThreeReg = (OpInfo[2].RegClass > 0);
-  bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
+  bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
   bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
 
   // Build the register operands, followed by the immediate.
@@ -2160,8 +2160,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
 static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2214,7 +2214,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::rGPRRegClassID &&
@@ -2259,7 +2259,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::rGPRRegClassID &&
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 8ae87f81cc1e..78d3e477975c 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -126,38 +126,6 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   }
 }
 
-static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
-                       const MCAsmInfo *MAI) {
-  // Break it up into two parts that make up a shifter immediate.
-  V = ARM_AM::getSOImmVal(V);
-  assert(V != -1 && "Not a valid so_imm value!");
-
-  unsigned Imm = ARM_AM::getSOImmValImm(V);
-  unsigned Rot = ARM_AM::getSOImmValRot(V);
-
-  // Print low-level immediate formation info, per
-  // A5.2.3: Data-processing (immediate), and
-  // A5.2.4: Modified immediate constants in ARM instructions
-  if (Rot) {
-    O << "#" << Imm << ", #" << Rot;
-    // Pretty printed version.
-    if (CommentStream)
-      *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
-  } else {
-    O << "#" << Imm;
-  }
-}
-
-
-/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
-/// immediate in bits 0-7.
-void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printSOImm(O, MO.getImm(), CommentStream, &MAI);
-}
-
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
 //    REG 0   0           - e.g. R5
@@ -174,6 +142,8 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
   // Print the shift opc.
   ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
   O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc == ARM_AM::rrx)
+    return;
   if (MO2.getReg()) {
     O << ' ' << getRegisterName(MO2.getReg());
     assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index bde0eb9199a9..d5f238bb8a61 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -19,11 +19,10 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
 
 class ARMInstPrinter : public MCInstPrinter {
 public:
-  ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+  ARMInstPrinter(const MCAsmInfo &MAI)
     : MCInstPrinter(MAI) {}
 
   virtual void printInst(const MCInst *MI, raw_ostream &O);
@@ -39,8 +38,6 @@ public:
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
-  void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
   void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53b4c95d3801..53b4c95d3801 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
diff --git a/lib/Target/ARM/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 90f7822ea580..90f7822ea580 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 000000000000..f8fcf2b8aff1
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,144 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "ARMMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT) {
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  unsigned Len = TT.size();
+  unsigned Idx = 0;
+
+  // FIXME: Enahnce Triple helper class to extract ARM version.
+  bool isThumb = false;
+  if (Len >= 5 && TT.substr(0, 4) == "armv")
+    Idx = 4;
+  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+    isThumb = true;
+    if (Len >= 7 && TT[5] == 'v')
+      Idx = 6;
+  }
+
+  std::string ARMArchFeature;
+  if (Idx) {
+    unsigned SubVer = TT[Idx];
+    if (SubVer >= '7' && SubVer <= '9') {
+      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+        // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+      } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+        // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+        //       FeatureT2XtPk
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+      } else
+        // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
+        ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+    } else if (SubVer == '6') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+        ARMArchFeature = "+v6t2";
+      else
+        ARMArchFeature = "+v6";
+    } else if (SubVer == '5') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+        ARMArchFeature = "+v5te";
+      else
+        ARMArchFeature = "+v5t";
+    } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+      ARMArchFeature = "+v4t";
+  }
+
+  if (isThumb) {
+    if (ARMArchFeature.empty())
+      ARMArchFeature = "+thumb-mode";
+    else
+      ARMArchFeature += ",+thumb-mode";
+  }
+
+  return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitARMMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitARMMCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    return new ARMMCAsmInfoDarwin();
+
+  return new ARMELFMCAsmInfo();
+}
+
+extern "C" void LLVMInitializeARMMCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+  RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 000000000000..74701e3516dc
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,52 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+  std::string ParseARMTriple(StringRef TT);
+
+  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..68daf42c9191
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMARMDesc
+  ARMMCTargetDesc.cpp
+  ARMMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM/MCTargetDesc/Makefile b/lib/Target/ARM/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..448ed9df2bff
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f6d024232eae..2df00538b39f 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -137,11 +137,11 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
 
 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
   // FIXME: Detect integer instructions properly.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
     return false;
-  unsigned Opcode = TID.getOpcode();
+  unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
     return false;
   if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
@@ -218,18 +218,18 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
   unsigned PredReg = MI->getOperand(++NextOp).getReg();
 
-  const TargetInstrDesc &TID1 = TII->get(MulOpc);
-  const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
-  unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+  const MCInstrDesc &MCID1 = TII->get(MulOpc);
+  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+  unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
 
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
     .addReg(Src1Reg, getKillRegState(Src1Kill))
     .addReg(Src2Reg, getKillRegState(Src2Kill));
   if (HasLane)
     MIB.addImm(LaneImm);
   MIB.addImm(Pred).addReg(PredReg);
 
-  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
     .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
 
   if (NegAcc) {
@@ -273,15 +273,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
       continue;
     }
 
-    const TargetInstrDesc &TID = MI->getDesc();
-    if (TID.isBarrier()) {
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.isBarrier()) {
       clearStack();
       Skip = 0;
       ++MII;
       continue;
     }
 
-    unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
     if (Domain == ARMII::DomainGeneral) {
       if (++Skip == 2)
         // Assume dual issues of non-VFP / NEON instructions.
@@ -291,7 +291,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
 
       unsigned MulOpc, AddSubOpc;
       bool NegAcc, HasLane;
-      if (!TII->isFpMLxInstruction(TID.getOpcode(),
+      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
                                    MulOpc, AddSubOpc, NegAcc, HasLane) ||
           !FindMLxHazard(MI))
         pushStack(MI);
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 65a6494986fe..eb8c60354476 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -12,14 +12,14 @@ LIBRARYNAME = LLVMARMCodeGen
 TARGET = ARM
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
-                ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
-                ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
-                ARMGenDAGISel.inc ARMGenSubtarget.inc \
+BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
+		ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
+                ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
                 ARMGenDecoderTables.inc ARMGenEDInfo.inc \
-                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc
+                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+		ARMGenMCPseudoLowering.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 965665c2821a..c85d1e99705a 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -77,7 +77,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
       }
 
       if (inNEONDomain(Domain, isA8)) {
-        // Convert VMOVD to VMOVDneon
+        // Convert VMOVD to VORRd
         unsigned DestReg = MI->getOperand(0).getReg();
 
         DEBUG({errs() << "vmov convert: "; MI->dump();});
@@ -88,7 +88,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
         //  - The imp-defs / imp-uses are superregs only, we don't care about
         //    them.
         AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
-                             TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
+                             TII->get(ARM::VORRd), DestReg)
+          .addReg(SrcReg).addReg(SrcReg));
         MBB.erase(MI);
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 8ba9a27e95c8..2f6842e8cb60 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -681,3 +681,21 @@ is compiled and optimized to:
     str    r1, [r0]
 
 //===---------------------------------------------------------------------===//
+
+Improve codegen for select's:
+if (x != 0) x = 1
+if (x == 1) x = 1
+
+ARM codegen used to look like this:
+       mov     r1, r0
+       cmp     r1, #1
+       mov     r0, #0
+       moveq   r0, #1
+
+The naive lowering select between two different values. It should recognize the
+test is equality test so it's more a conditional move rather than a select:
+       cmp     r0, #1
+       movne   r0, #0
+
+Currently this is a ARM specific dag combine. We probably should make it into a
+target-neutral one.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index dee3d278203f..c258870e48a5 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -136,8 +136,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0)
       .setMIFlags(MachineInstr::FrameSetup);
-    if (NumBytes > 7)
-      // If offset is > 7 then sp cannot be adjusted in a single instruction,
+    if (NumBytes > 508)
+      // If offset is > 508 then sp cannot be adjusted in a single instruction,
       // try restoring from fp instead.
       AFI->setShouldRestoreSPFromFP(true);
   }
@@ -160,7 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   // will be allocated after this, so we can still use the base pointer
   // to reference locals.
   if (RegInfo->hasBasePointer(MF))
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+                   .addReg(ARM::SP));
 
   // If the frame has variable sized objects then the epilogue must restore
   // the sp from fp. We can assume there's an FP here since hasFP already
@@ -177,7 +178,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 }
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  if (MI->getOpcode() == ARM::tRestore &&
+  if (MI->getOpcode() == ARM::tLDRspi &&
       MI->getOperand(1).isFI() &&
       isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
     return true;
@@ -239,11 +240,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                "No scratch register to restore SP from FP!");
         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   TII, *RegInfo);
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-          .addReg(ARM::R4);
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(ARM::R4));
       } else
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-          .addReg(FramePtr);
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(FramePtr));
     } else {
       if (MBBI->getOpcode() == ARM::tBX_RET &&
           &MBB.front() != MBBI &&
@@ -270,8 +273,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
 
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
 
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
-      .addReg(ARM::R3, RegState::Kill);
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill));
     // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 3fbb43340c3f..218311d78d30 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "Thumb1InstrInfo.h"
 #include "ARM.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,18 +36,8 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
                                   bool KillSrc) const {
-  bool tDest = ARM::tGPRRegClass.contains(DestReg);
-  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
-  unsigned Opc = ARM::tMOVgpr2gpr;
-  if (tDest && tSrc)
-    Opc = ARM::tMOVr;
-  else if (tSrc)
-    Opc = ARM::tMOVtgpr2gpr;
-  else if (tDest)
-    Opc = ARM::tMOVgpr2tgpr;
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
   assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
          "Thumb1 can only copy GPR registers");
 }
@@ -76,7 +65,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                               MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
@@ -105,7 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                               MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6bf565068e4a..4eb0b6c93e1d 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -239,13 +239,13 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       unsigned Chunk = (1 << 3) - 1;
       unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
       Bytes -= ThisVal;
-      const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+      const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
       const MachineInstrBuilder MIB =
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags));
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
       AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
     } else {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill)
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill))
         .setMIFlags(MIFlags);
     }
     BaseReg = DestReg;
@@ -291,8 +291,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   }
 
   if (ExtraOpc) {
-    const TargetInstrDesc &TID = TII.get(ExtraOpc);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+    const MCInstrDesc &MCID = TII.get(ExtraOpc);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
                    .addReg(DestReg, RegState::Kill)
                    .addImm(((unsigned)NumBytes) & 3)
                    .setMIFlags(MIFlags));
@@ -360,8 +360,8 @@ static void emitThumbConstant(MachineBasicBlock &MBB,
   if (Imm > 0)
     emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
   if (isSub) {
-    const TargetInstrDesc &TID = TII.get(ARM::tRSB);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+    const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
                    .addReg(DestReg, RegState::Kill));
   }
 }
@@ -377,11 +377,9 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
 static unsigned convertToNonSPOpcode(unsigned Opcode) {
   switch (Opcode) {
   case ARM::tLDRspi:
-  case ARM::tRestore:           // FIXME: Should this opcode be here?
     return ARM::tLDRi;
 
   case ARM::tSTRspi:
-  case ARM::tSpill:             // FIXME: Should this opcode be here?
     return ARM::tSTRi;
   }
 
@@ -396,7 +394,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc dl = MI.getDebugLoc();
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
 
   if (Opcode == ARM::tADDrSPi) {
@@ -419,13 +417,12 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
     unsigned PredReg;
     if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
       // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
+      MI.setDesc(TII.get(ARM::tMOVr));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-      // Remove offset and remaining explicit predicate operands.
-      do MI.RemoveOperand(FrameRegIdx+1);
-      while (MI.getNumOperands() > FrameRegIdx+1 &&
-             (!MI.getOperand(FrameRegIdx+1).isReg() ||
-              !MI.getOperand(FrameRegIdx+1).isImm()));
+      // Remove offset and add predicate operands.
+      MI.RemoveOperand(FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
       return true;
     }
 
@@ -524,7 +521,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
 
     // If this is a thumb spill / restore, we will be using a constpool load to
     // materialize the offset.
-    if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
+    if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
       ImmOp.ChangeToImmediate(0);
     } else {
       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
@@ -567,8 +564,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
   // the function, the offset will be negative. Use R12 instead since that's
   // a call clobbered register that we know won't be used in Thumb1 mode.
   DebugLoc DL;
-  BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
-    addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+  AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+    .addReg(ARM::R12, RegState::Define)
+    .addReg(Reg, RegState::Kill));
 
   // The UseMI is where we would like to restore the register. If there's
   // interference with R12 before then, however, we'll need to restore it
@@ -591,8 +589,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
     }
   }
   // Restore the register from R12
-  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
-    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+  AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
 
   return true;
 }
@@ -653,7 +651,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   assert(Offset && "This code isn't needed if offset already handled!");
 
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
 
   // Remove predicate first.
   int PIdx = MI.findFirstPredOperandIdx();
@@ -664,7 +662,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // Use the destination register to materialize sp + offset.
     unsigned TmpReg = MI.getOperand(0).getReg();
     bool UseRR = false;
-    if (Opcode == ARM::tRestore) {
+    if (Opcode == ARM::tLDRspi) {
       if (FrameReg == ARM::SP)
         emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
                                  Offset, false, TII, *this);
@@ -687,7 +685,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
       bool UseRR = false;
 
-      if (Opcode == ARM::tSpill) {
+      if (Opcode == ARM::tSTRspi) {
         if (FrameReg == ARM::SP)
           emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
                                    Offset, false, TII, *this);
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 45e693744b80..360ec009e201 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -98,9 +98,6 @@ static bool isCopy(MachineInstr *MI) {
   case ARM::MOVr:
   case ARM::MOVr_TC:
   case ARM::tMOVr:
-  case ARM::tMOVgpr2tgpr:
-  case ARM::tMOVtgpr2gpr:
-  case ARM::tMOVgpr2gpr:
   case ARM::t2MOVr:
     return true;
   }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d169dbb7f197..51b56aaeb008 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -15,7 +15,6 @@
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMAddressingModes.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -113,18 +112,8 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
     return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
 
-  bool tDest = ARM::tGPRRegClass.contains(DestReg);
-  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
-  unsigned Opc = ARM::tMOVgpr2gpr;
-  if (tDest && tSrc)
-    Opc = ARM::tMOVr;
-  else if (tSrc)
-    Opc = ARM::tMOVtgpr2gpr;
-  else if (tDest)
-    Opc = ARM::tMOVgpr2tgpr;
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
 }
 
 void Thumb2InstrInfo::
@@ -232,8 +221,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
     unsigned Opc = 0;
     if (DestReg == ARM::SP && BaseReg != ARM::SP) {
       // mov sp, rn. Note t2MOVr cannot be used.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg)
-        .addReg(BaseReg).setMIFlags(MIFlags);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+        .addReg(BaseReg).setMIFlags(MIFlags));
       BaseReg = ARM::SP;
       continue;
     }
@@ -252,7 +241,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
       }
 
       // sub rd, sp, so_imm
-      Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
       if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
         NumBytes = 0;
       } else {
@@ -396,7 +385,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                unsigned FrameReg, int &Offset,
                                const ARMBaseInstrInfo &TII) {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   bool isSub = false;
 
@@ -410,25 +399,24 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     unsigned PredReg;
     if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
       // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
+      MI.setDesc(TII.get(ARM::tMOVr));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       // Remove offset and remaining explicit predicate operands.
       do MI.RemoveOperand(FrameRegIdx+1);
-      while (MI.getNumOperands() > FrameRegIdx+1 &&
-             (!MI.getOperand(FrameRegIdx+1).isReg() ||
-              !MI.getOperand(FrameRegIdx+1).isImm()));
+      while (MI.getNumOperands() > FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
       return true;
     }
 
-    bool isSP = FrameReg == ARM::SP;
     bool HasCCOut = Opcode != ARM::t2ADDri12;
 
     if (Offset < 0) {
       Offset = -Offset;
       isSub = true;
-      MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+      MI.setDesc(TII.get(ARM::t2SUBri));
     } else {
-      MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
+      MI.setDesc(TII.get(ARM::t2ADDri));
     }
 
     // Common case: small offset, fits into instruction.
@@ -444,9 +432,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     // Another common case: imm12.
     if (Offset < 4096 &&
         (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
-      unsigned NewOpc = isSP
-        ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
-        : (isSub ? ARM::t2SUBri12   : ARM::t2ADDri12);
+      unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
       MI.setDesc(TII.get(NewOpc));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
@@ -579,8 +565,7 @@ void
 Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
                                        MachineInstr *UseMI,
                                        const TargetRegisterInfo &TRI) const {
-  if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
-      SrcMI->getOperand(1).isKill())
+  if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
     return;
 
   unsigned PredReg = 0;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ce2e9663fb74..c741a6e8a5b7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -57,10 +57,8 @@ namespace {
   static const ReduceEntry ReduceTable[] = {
     // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
     { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
-    // Note: immediate scale is 4.
-    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0,1 },
     { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
     { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
     { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
@@ -84,9 +82,7 @@ namespace {
     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
     { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
     // FIXME: Do we need the 16-bit 'S' variant?
-    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0,0 },
-    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0,0 },
-    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0,0 },
+    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
     { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
@@ -189,8 +185,8 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
   }
 }
 
-static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
-  for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
@@ -291,7 +287,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
                  Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
                  Opc == ARM::t2LDMDB_UPD);
   bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
-  bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
+  bool isSPOk = isPCOk || isLROk;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -481,14 +477,54 @@ bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
                                 bool LiveCPSR, MachineInstr *CPSRDef) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::t2ADDri) {
+    // If the source register is SP, try to reduce to tADDrSPi, otherwise
+    // it's a normal reduce.
+    if (MI->getOperand(1).getReg() != ARM::SP) {
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+        return true;
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    }
+    // Try to reduce to tADDrSPi.
+    unsigned Imm = MI->getOperand(2).getImm();
+    // The immediate must be in range, the destination register must be a low
+    // reg, the predicate must be "always" and the condition flags must not
+    // be being set.
+    if (Imm & 3 || Imm > 1020)
+      return false;
+    if (!isARMLowRegister(MI->getOperand(0).getReg()))
+      return false;
+    if (MI->getOperand(3).getImm() != ARMCC::AL)
+      return false;
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.hasOptionalDef() &&
+        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+      return false;
+
+    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+                                      TII->get(ARM::tADDrSPi))
+      .addOperand(MI->getOperand(0))
+      .addOperand(MI->getOperand(1))
+      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+
+    // Transfer MI flags.
+    MIB.setMIFlags(MI->getFlags());
+
+    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
+
+    MBB.erase(MI);
+    ++NumNarrows;
+    return true;
+  }
+
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.mayLoad() || TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayLoad() || MCID.mayStore())
     return ReduceLoadStore(MBB, MI, Entry);
 
-  unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
   case ARM::t2ADDSri:
@@ -531,13 +567,6 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       return true;
     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
   }
-  case ARM::t2ADDrSPi: {
-    static const ReduceEntry NarrowEntry =
-      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 };
-    if (MI->getOperand(0).getReg() == ARM::SP)
-      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef);
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
-  }
   }
   return false;
 }
@@ -576,23 +605,23 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Check if it's possible / necessary to transfer the predicate.
-  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
-    if (!NewTID.isPredicable())
+    if (!NewMCID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
-    SkipPred = !NewTID.isPredicable();
+    SkipPred = !NewMCID.isPredicable();
   }
 
   bool HasCC = false;
   bool CCDead = false;
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.hasOptionalDef()) {
-    unsigned NumOps = TID.getNumOperands();
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
@@ -602,15 +631,15 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
-  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
       canAddPseudoFlagDep(CPSRDef, MI))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
-  if (NewTID.hasOptionalDef()) {
+  if (NewMCID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
@@ -618,11 +647,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Transfer the rest of operands.
-  unsigned NumOps = TID.getNumOperands();
+  unsigned NumOps = MCID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
       continue;
-    if (SkipPred && TID.OpInfo[i].isPredicate())
+    if (SkipPred && MCID.OpInfo[i].isPredicate())
       continue;
     MIB.addOperand(MI->getOperand(i));
   }
@@ -645,47 +674,44 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     return false;
 
   unsigned Limit = ~0U;
-  unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
   if (Entry.Imm1Limit)
-    Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;
+    Limit = (1 << Entry.Imm1Limit) - 1;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate())
+  const MCInstrDesc &MCID = MI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate())
       continue;
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
       unsigned Reg = MO.getReg();
       if (!Reg || Reg == ARM::CPSR)
         continue;
-      if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
-        continue;
       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
         return false;
     } else if (MO.isImm() &&
-               !TID.OpInfo[i].isPredicate()) {
-      if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0)
+               !MCID.OpInfo[i].isPredicate()) {
+      if (((unsigned)MO.getImm()) > Limit)
         return false;
     }
   }
 
   // Check if it's possible / necessary to transfer the predicate.
-  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
-    if (!NewTID.isPredicable())
+    if (!NewMCID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
-    SkipPred = !NewTID.isPredicable();
+    SkipPred = !NewMCID.isPredicable();
   }
 
   bool HasCC = false;
   bool CCDead = false;
-  if (TID.hasOptionalDef()) {
-    unsigned NumOps = TID.getNumOperands();
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
@@ -695,15 +721,15 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
-  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
       canAddPseudoFlagDep(CPSRDef, MI))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
-  if (NewTID.hasOptionalDef()) {
+  if (NewMCID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
@@ -711,29 +737,25 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Transfer the rest of operands.
-  unsigned NumOps = TID.getNumOperands();
+  unsigned NumOps = MCID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
       continue;
-    if ((TID.getOpcode() == ARM::t2RSBSri ||
-         TID.getOpcode() == ARM::t2RSBri) && i == 2)
+    if ((MCID.getOpcode() == ARM::t2RSBSri ||
+         MCID.getOpcode() == ARM::t2RSBri) && i == 2)
       // Skip the zero immediate operand, it's now implicit.
       continue;
-    bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
+    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
     if (SkipPred && isPred)
         continue;
     const MachineOperand &MO = MI->getOperand(i);
-    if (Scale > 1 && !isPred && MO.isImm())
-      MIB.addImm(MO.getImm() / Scale);
-    else {
-      if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
-        // Skip implicit def of CPSR. Either it's modeled as an optional
-        // def now or it's already an implicit def on the new instruction.
-        continue;
-      MIB.addOperand(MO);
-    }
+    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+      // Skip implicit def of CPSR. Either it's modeled as an optional
+      // def now or it's already an implicit def on the new instruction.
+      continue;
+    MIB.addOperand(MO);
   }
-  if (!TID.isPredicable() && NewTID.isPredicable())
+  if (!MCID.isPredicable() && NewMCID.isPredicable())
     AddDefaultPred(MIB);
 
   // Transfer MI flags.
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 2c359dade29b..6ffaf45f4ed1 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_ALPHA_H
 #define TARGET_ALPHA_H
 
+#include "MCTargetDesc/AlphaMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -37,17 +38,6 @@ namespace llvm {
   FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
   FunctionPass *createAlphaBranchSelectionPass();
 
-  extern Target TheAlphaTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Alpha registers.  This defines a mapping from
-// register name to register number.
-//
-#include "AlphaGenRegisterNames.inc"
-
-// Defines symbolic names for the Alpha instructions.
-//
-#include "AlphaGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 0875cfd1c3c5..de003fb4c65e 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -122,6 +122,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   setOperationAction(ISD::SETCC, MVT::f32, Promote);
 
   setOperationAction(ISD::BITCAST, MVT::f32, Promote);
@@ -824,41 +827,24 @@ AlphaTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-std::vector<unsigned> AlphaTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-    default: break;  // Unknown constriant letter
-    case 'f':
-      return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
-                                   Alpha::F3 , Alpha::F4 , Alpha::F5 ,
-                                   Alpha::F6 , Alpha::F7 , Alpha::F8 ,
-                                   Alpha::F9 , Alpha::F10, Alpha::F11,
-                                   Alpha::F12, Alpha::F13, Alpha::F14,
-                                   Alpha::F15, Alpha::F16, Alpha::F17,
-                                   Alpha::F18, Alpha::F19, Alpha::F20,
-                                   Alpha::F21, Alpha::F22, Alpha::F23,
-                                   Alpha::F24, Alpha::F25, Alpha::F26,
-                                   Alpha::F27, Alpha::F28, Alpha::F29,
-                                   Alpha::F30, Alpha::F31, 0);
     case 'r':
-      return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
-                                   Alpha::R3 , Alpha::R4 , Alpha::R5 ,
-                                   Alpha::R6 , Alpha::R7 , Alpha::R8 ,
-                                   Alpha::R9 , Alpha::R10, Alpha::R11,
-                                   Alpha::R12, Alpha::R13, Alpha::R14,
-                                   Alpha::R15, Alpha::R16, Alpha::R17,
-                                   Alpha::R18, Alpha::R19, Alpha::R20,
-                                   Alpha::R21, Alpha::R22, Alpha::R23,
-                                   Alpha::R24, Alpha::R25, Alpha::R26,
-                                   Alpha::R27, Alpha::R28, Alpha::R29,
-                                   Alpha::R30, Alpha::R31, 0);
+      return std::make_pair(0U, Alpha::GPRCRegisterClass);
+    case 'f':
+      return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
+	std::make_pair(0U, Alpha::F4RCRegisterClass);
     }
   }
-
-  return std::vector<unsigned>();
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
+
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index d38c3145b19f..13383f4430f9 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -94,9 +94,9 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::vector<unsigned>
-      getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        EVT VT) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
 
     MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 5a2f5610fdb4..4dcec8f31750 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -14,17 +14,21 @@
 #include "Alpha.h"
 #include "AlphaInstrInfo.h"
 #include "AlphaMachineFunctionInfo.h"
-#include "AlphaGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AlphaGenInstrInfo.inc"
 using namespace llvm;
 
 AlphaInstrInfo::AlphaInstrInfo()
-  : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)),
-    RI(*this) { }
+  : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+    RI(*this) {
+}
 
 
 unsigned 
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index ee6077a4a01a..337a85cdf22d 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "AlphaRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "AlphaGenInstrInfo.inc"
+
 namespace llvm {
 
-class AlphaInstrInfo : public TargetInstrInfoImpl {
+class AlphaInstrInfo : public AlphaGenInstrInfo {
   const AlphaRegisterInfo RI;
 public:
   AlphaInstrInfo();
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index d6c3809960aa..df8f157266e1 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -33,10 +33,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "AlphaGenRegisterInfo.inc"
+
 using namespace llvm;
 
 AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
-  : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+  : AlphaGenRegisterInfo(),
     TII(tii) {
 }
 
@@ -204,10 +208,8 @@ int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
   return -1;
 }
 
-#include "AlphaGenRegisterInfo.inc"
-
 std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
 {
-  std::string s(RegisterDescriptors[reg].Name);
+  std::string s(AlphaRegDesc[reg].Name);
   return s;
 }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index ffe6cf19e210..1072bf73f199 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -15,7 +15,9 @@
 #define ALPHAREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "AlphaGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "AlphaGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
index d644f05f91ae..32120d750413 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.td
+++ b/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -110,10 +110,10 @@ def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
   // $28 is undefined after any and all calls
 
 /// Register classes
-def GPRC : RegisterClass<"Alpha", [i64], 64,
+def GPRC : RegisterClass<"Alpha", [i64], 64, (add
      // Volatile
-     [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
-      R23, R24, R25, R28, 
+     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+     R23, R24, R25, R28,
      //Special meaning, but volatile
      R27, //procedure address
      R26, //return address
@@ -121,18 +121,13 @@ def GPRC : RegisterClass<"Alpha", [i64], 64,
      // Non-volatile
      R9, R10, R11, R12, R13, R14,
 // Don't allocate 15, 30, 31
-     R15, R30, R31 ]>; //zero
+     R15, R30, R31)>; //zero
 
-def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1, 
+def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
         F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
         F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
         // Saved:
         F2, F3, F4, F5, F6, F7, F8, F9,
-        F31 ]>; //zero
+        F31)>; //zero
 
-def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1, 
-        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
-        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
-        // Saved:
-        F2, F3, F4, F5, F6, F7, F8, F9,
-        F31 ]>; //zero
+def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
index bda7104ab926..624a5e2ebd09 100644
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -7,19 +7,30 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Alpha specific subclass of TargetSubtarget.
+// This file implements the Alpha specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "AlphaSubtarget.h"
 #include "Alpha.h"
-#include "AlphaGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AlphaGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS)
-  : HasCT(false) {
-  std::string CPU = "generic";
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS)
+  : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 }
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
index f0eb93c6cba2..70b311683f8b 100644
--- a/lib/Target/Alpha/AlphaSubtarget.h
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Alpha specific subclass of TargetSubtarget.
+// This file declares the Alpha specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef ALPHASUBTARGET_H
 #define ALPHASUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "AlphaGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRe;
 
-class AlphaSubtarget : public TargetSubtarget {
+class AlphaSubtarget : public AlphaGenSubtargetInfo {
 protected:
 
   bool HasCT;
@@ -32,12 +35,12 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  AlphaSubtarget(const std::string &TT, const std::string &FS);
+  AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool hasCT() const { return HasCT; }
 };
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index b53533b8ebcb..3b65d41be892 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Alpha.h"
-#include "AlphaMCAsmInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/FormattedStream.h"
@@ -21,15 +20,15 @@ using namespace llvm;
 extern "C" void LLVMInitializeAlphaTarget() { 
   // Register the target.
   RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
-  RegisterAsmInfo<AlphaMCAsmInfo> Y(TheAlphaTarget);
 }
 
 AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout("e-f128:128:128-n64"),
     FrameLowering(Subtarget),
-    Subtarget(TT, FS),
+    Subtarget(TT, CPU, FS),
     TLInfo(*this),
     TSInfo(*this) {
   setRelocationModel(Reloc::PIC_);
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 26238fbbc431..cf00e5875d34 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -37,7 +37,7 @@ class AlphaTargetMachine : public LLVMTargetMachine {
 
 public:
   AlphaTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index 454262ad631d..a6027bbf0b2a 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS Alpha.td)
 
-tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
-tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
-tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
-tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
+tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
+tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
 tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
 tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
 tablegen(AlphaGenCallingConv.inc -gen-callingconv)
-tablegen(AlphaGenSubtarget.inc -gen-subtarget)
+tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(AlphaCodeGen
   AlphaAsmPrinter.cpp
@@ -18,7 +15,6 @@ add_llvm_target(AlphaCodeGen
   AlphaISelLowering.cpp
   AlphaFrameLowering.cpp
   AlphaLLRP.cpp
-  AlphaMCAsmInfo.cpp
   AlphaRegisterInfo.cpp
   AlphaSubtarget.cpp
   AlphaTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(AlphaCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
index a35e8846e072..a35e8846e072 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
index 837844bd29a9..837844bd29a9 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.h
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
new file mode 100644
index 000000000000..562052b6df67
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCTargetDesc.h"
+#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AlphaGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createAlphaMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAlphaMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+}
+
+static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+                                          createAlphaMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+  RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
new file mode 100644
index 000000000000..b0619e6cb011
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheAlphaTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Alpha registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AlphaGenRegisterInfo.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AlphaGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ad0dd26aafb1
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMAlphaDesc
+  AlphaMCTargetDesc.cpp
+  AlphaMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..d55175fa69dc
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
index 9564be680e51..f48847a0627d 100644
--- a/lib/Target/Alpha/Makefile
+++ b/lib/Target/Alpha/Makefile
@@ -12,12 +12,10 @@ LIBRARYNAME = LLVMAlphaCodeGen
 TARGET = Alpha
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
-                AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
-                AlphaGenInstrInfo.inc \
+BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \
                 AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
-                AlphaGenCallingConv.inc AlphaGenSubtarget.inc
+                AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h
index ec1fa8689ded..a00ff4cc3275 100644
--- a/lib/Target/Blackfin/Blackfin.h
+++ b/lib/Target/Blackfin/Blackfin.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_BLACKFIN_H
 #define TARGET_BLACKFIN_H
 
+#include "MCTargetDesc/BlackfinMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -24,15 +25,7 @@ namespace llvm {
 
   FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
                                       CodeGenOpt::Level OptLevel);
-  extern Target TheBlackfinTarget;
 
 } // end namespace llvm
 
-// Defines symbolic names for Blackfin registers.  This defines a mapping from
-// register name to register number.
-#include "BlackfinGenRegisterNames.inc"
-
-// Defines symbolic names for the Blackfin instructions.
-#include "BlackfinGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 42659aed5d71..215ca43ea338 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -146,21 +146,21 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
        NI != DAG.allnodes_end(); ++NI) {
     if (NI->use_empty() || !NI->isMachineOpcode())
       continue;
-    const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode());
+    const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
     for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
       if (!UI->isMachineOpcode())
         continue;
 
-      if (UI.getUse().getResNo() >= DefTID.getNumDefs())
+      if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
         continue;
       const TargetRegisterClass *DefRC =
-        DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI);
+        TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
 
-      const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode());
-      if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands())
+      const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
+      if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
         continue;
       const TargetRegisterClass *UseRC =
-        UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI);
+        TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
       if (!DefRC || !UseRC)
         continue;
       // We cannot copy CC <-> !(CC/D)
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 588d9bded87d..d5728324de87 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -621,39 +621,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   case 'w': return Pair(0U, ALLRegisterClass);
   case 'Z': return Pair(P3, PRegisterClass);
   case 'Y': return Pair(P1, PRegisterClass);
+  case 'z': return Pair(0U, zConsRegisterClass);
+  case 'D': return Pair(0U, DConsRegisterClass);
+  case 'W': return Pair(0U, WConsRegisterClass);
+  case 'c': return Pair(0U, cConsRegisterClass);
+  case 't': return Pair(0U, tConsRegisterClass);
+  case 'u': return Pair(0U, uConsRegisterClass);
+  case 'k': return Pair(0U, kConsRegisterClass);
+  case 'y': return Pair(0U, yConsRegisterClass);
   }
 
   // Not implemented: q0-q7, qA. Use {R2} etc instead.
-  // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to
-  // getRegClassForInlineAsmConstraint()
 
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> BlackfinTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
-  using namespace BF;
-
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-  case 'z': return make_vector<unsigned>(P0, P1, P2, 0);
-  case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0);
-  case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0);
-  case 'c': return make_vector<unsigned>(I0, I1, I2, I3,
-                                         B0, B1, B2, B3,
-                                         L0, L1, L2, L3, 0);
-  case 't': return make_vector<unsigned>(LT0, LT1, 0);
-  case 'u': return make_vector<unsigned>(LB0, LB1, 0);
-  case 'k': return make_vector<unsigned>(LC0, LC1, 0);
-  case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE,
-                                         ASTAT, SEQSTAT, USP, 0);
-  }
-
-  return std::vector<unsigned>();
-}
-
 bool BlackfinTargetLowering::
 isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Blackfin target isn't yet aware of offsets.
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 9a54557ad526..b65775b9285d 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -48,9 +48,6 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass*>
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     const char *getTargetNodeName(unsigned Opcode) const;
 
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index 598cf2a68c6b..d190ae7984b2 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -14,17 +14,20 @@
 #include "BlackfinInstrInfo.h"
 #include "BlackfinSubtarget.h"
 #include "Blackfin.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
 #include "BlackfinGenInstrInfo.inc"
 
 using namespace llvm;
 
 BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
-  : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)),
+  : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
     RI(ST, *this),
     Subtarget(ST) {}
 
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index fdc1029da588..d22ddf0d7313 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "BlackfinRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "BlackfinGenInstrInfo.inc"
+
 namespace llvm {
 
-  class BlackfinInstrInfo : public TargetInstrInfoImpl {
+  class BlackfinInstrInfo : public BlackfinGenInstrInfo {
     const BlackfinRegisterInfo RI;
     const BlackfinSubtarget& Subtarget;
   public:
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index 34a8d3809ea2..ae8ee9e2a1a2 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -83,7 +83,7 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 
 static const FunctionType *getType(LLVMContext &Context, unsigned id) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
   
 #define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 6ca460ef803e..3a7c104ee055 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -29,13 +29,15 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
 using namespace llvm;
 
 BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
                                            const TargetInstrInfo &tii)
-  : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
-    Subtarget(st),
-    TII(tii) {}
+  : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
 
 const unsigned*
 BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -356,6 +358,3 @@ int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
   llvm_unreachable("What is the dwarf register number");
   return -1;
 }
-
-#include "BlackfinGenRegisterInfo.inc"
-
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 375d277216c2..86f45c17c625 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -16,7 +16,9 @@
 #define BLACKFINREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "BlackfinGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "BlackfinGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index d8fd302b513e..1c42205eb780 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -195,108 +195,83 @@ def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
 def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
 
 // Register classes.
-def D16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>;
+def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
 
-def D16L : RegisterClass<"BF", [i16], 16,
-    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>;
+def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
 
-def D16H : RegisterClass<"BF", [i16], 16,
-    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>;
-
-def P16 : RegisterClass<"BF", [i16], 16,
-    [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
 
 def P16L : RegisterClass<"BF", [i16], 16,
-    [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+                         (add (sequence "P%uL", 0, 5), SPL, FPL)>;
 
 def P16H : RegisterClass<"BF", [i16], 16,
-    [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+                         (add (sequence "P%uH", 0, 5), SPH, FPH)>;
+
+def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
 
-def DP16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
-     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
 
-def DP16L : RegisterClass<"BF", [i16], 16,
-    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
-     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
 
-def DP16H : RegisterClass<"BF", [i16], 16,
-    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
-     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
 
 def GR16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
-     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL,
+    (add DP16,
      I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
      M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
      B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
-     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
+     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
 
-def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
   let SubRegClasses = [(D16L lo16), (D16H hi16)];
 }
 
-def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
+def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
   let SubRegClasses = [(P16L lo16), (P16H hi16)];
 }
 
-def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>;
-def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>;
-def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>;
-def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
-
-def DP : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5, FP, SP]> {
+def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
   let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
 }
 
-def GR : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5,
-     I0, I1, I2, I3, M0, M1, M2, M3,
-     B0, B1, B2, B3, L0, L1, L2, L3,
-     FP, SP]>;
+def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
+def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
+def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
+def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
+
+def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
 
 def ALL : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5,
-     I0, I1, I2, I3, M0, M1, M2, M3,
-     B0, B1, B2, B3, L0, L1, L2, L3,
-     FP, SP,
+    (add GR,
      A0X, A0W, A1X, A1W, ASTAT, RETS,
      LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
-     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]>;
+     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
 
-def PI : RegisterClass<"BF", [i32], 32,
-    [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]>;
+def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
 
 // We are going to pretend that CC and !CC are 32-bit registers, even though
 // they only can hold 1 bit.
 let CopyCost = -1, Size = 8 in {
-def JustCC  : RegisterClass<"BF", [i32], 8, [CC]>;
-def NotCC   : RegisterClass<"BF", [i32], 8, [NCC]>;
-def AnyCC   : RegisterClass<"BF", [i32], 8, [CC, NCC]> {
-  let MethodProtos = [{
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    AnyCCClass::iterator
-    AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
-      return allocation_order_begin(MF)+1;
-    }
-  }];
-}
+def JustCC  : RegisterClass<"BF", [i32], 8, (add CC)>;
+def NotCC   : RegisterClass<"BF", [i32], 8, (add NCC)>;
+def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
 def StatBit : RegisterClass<"BF", [i1], 8,
-    [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>;
+    (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
 }
 
 // Should be i40, but that isn't defined. It is not a legal type yet anyway.
-def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>;
+def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
+
+// Register classes to match inline asm constraints.
+def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
+def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
+def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
+def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
+    	    			       	   	B0, B1, B2, B3,
+						L0, L1, L2, L3)>;
+def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
+def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
+def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
+def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
+    	    			       	   	RETE, ASTAT, SEQSTAT,
+						USP)>;
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
index e104c5245a9e..ec919cdf0b90 100644
--- a/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -7,18 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the blackfin specific subclass of TargetSubtarget.
+// This file implements the blackfin specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "BlackfinSubtarget.h"
-#include "BlackfinGenSubtarget.inc"
+#include "Blackfin.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BlackfinGenSubtargetInfo.inc"
 
 using namespace llvm;
 
 BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+                                     const std::string &CPU,
                                      const std::string &FS)
-  : sdram(false),
+  : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
     icplb(false),
     wa_mi_shift(false),
     wa_csync(false),
@@ -30,7 +36,9 @@ BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
     wa_killed_mmr(false),
     wa_rets(false)
 {
-  std::string CPU = "generic";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h
index d667fe26519b..1a01a81116d6 100644
--- a/lib/Target/Blackfin/BlackfinSubtarget.h
+++ b/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -7,19 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the BLACKFIN specific subclass of TargetSubtarget.
+// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef BLACKFIN_SUBTARGET_H
 #define BLACKFIN_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "BlackfinGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-  class BlackfinSubtarget : public TargetSubtarget {
+  class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
     bool sdram;
     bool icplb;
     bool wa_mi_shift;
@@ -32,12 +36,12 @@ namespace llvm {
     bool wa_killed_mmr;
     bool wa_rets;
   public:
-    BlackfinSubtarget(const std::string &TT, const std::string &FS);
+    BlackfinSubtarget(const std::string &TT, const std::string &CPU,
+                      const std::string &FS);
 
     /// ParseSubtargetFeatures - Parses features string setting specified
     /// subtarget options.  Definition of function is auto generated by tblgen.
-    std::string ParseSubtargetFeatures(const std::string &FS,
-                                       const std::string &CPU);
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   };
 
 } // end namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index e11920f568a2..a1c9f1c05e0d 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -12,7 +12,6 @@
 
 #include "BlackfinTargetMachine.h"
 #include "Blackfin.h"
-#include "BlackfinMCAsmInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
 
@@ -20,16 +19,15 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeBlackfinTarget() {
   RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
-  RegisterAsmInfo<BlackfinMCAsmInfo> Y(TheBlackfinTarget);
-
 }
 
 BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
                                              const std::string &TT,
+                                             const std::string &CPU,
                                              const std::string &FS)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout("e-p:32:32-i64:32-f64:32-n32"),
-    Subtarget(TT, FS),
+    Subtarget(TT, CPU, FS),
     TLInfo(*this),
     TSInfo(*this),
     InstrInfo(Subtarget),
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index 29b2b177fc3c..bd7dc84f04ae 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -36,7 +36,7 @@ namespace llvm {
     BlackfinIntrinsicInfo IntrinsicInfo;
   public:
     BlackfinTargetMachine(const Target &T, const std::string &TT,
-                          const std::string &FS);
+                          const std::string &CPU, const std::string &FS);
 
     virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
     virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index a47299ff1611..d3f33a987e69 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -1,13 +1,10 @@
 set(LLVM_TARGET_DEFINITIONS Blackfin.td)
 
-tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(BlackfinGenRegisterNames.inc -gen-register-enums)
-tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc)
-tablegen(BlackfinGenInstrNames.inc -gen-instr-enums)
-tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc)
+tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
+tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
 tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
 tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
-tablegen(BlackfinGenSubtarget.inc -gen-subtarget)
+tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
 tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
 tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
 
@@ -18,7 +15,6 @@ add_llvm_target(BlackfinCodeGen
   BlackfinISelDAGToDAG.cpp
   BlackfinISelLowering.cpp
   BlackfinFrameLowering.cpp
-  BlackfinMCAsmInfo.cpp
   BlackfinRegisterInfo.cpp
   BlackfinSubtarget.cpp
   BlackfinTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(BlackfinCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
index 5b9d4a29794e..5b9d4a29794e 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
index c372aa247e04..c372aa247e04 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
new file mode 100644
index 000000000000..0fa1471ae3e7
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCTargetDesc.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BlackfinGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createBlackfinMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitBlackfinMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+                                      createBlackfinMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
+                                                      StringRef CPU,
+                                                      StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+                                          createBlackfinMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+  RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
new file mode 100644
index 000000000000..5bffe94fc582
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINMCTARGETDESC_H
+#define BLACKFINMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheBlackfinTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Blackfin registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "BlackfinGenRegisterInfo.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#define GET_INSTRINFO_ENUM
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BlackfinGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..8cd924f9236f
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMBlackfinDesc
+  BlackfinMCTargetDesc.cpp
+  BlackfinMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Blackfin/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..6b26101f4473
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
index 5eb8e9a992b9..756ac6bcd8a0 100644
--- a/lib/Target/Blackfin/Makefile
+++ b/lib/Target/Blackfin/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMBlackfinCodeGen
 TARGET = Blackfin
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
-                BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \
-                BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \
-                BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \
+BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \
+		BlackfinGenAsmWriter.inc \
+                BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \
 		BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index fde2e29e80c6..415beb1dd1cd 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/InlineAsm.h"
@@ -37,6 +36,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -61,6 +62,12 @@ extern "C" void LLVMInitializeCBackendTarget() {
   RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
 }
 
+extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
+
 namespace {
   class CBEMCAsmInfo : public MCAsmInfo {
   public:
@@ -69,29 +76,6 @@ namespace {
       PrivateGlobalPrefix = "";
     }
   };
-  /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
-  /// any unnamed structure types that are used by the program, and merges
-  /// external functions with the same name.
-  ///
-  class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
-  public:
-    static char ID;
-    CBackendNameAllUsedStructsAndMergeFunctions()
-        : ModulePass(ID) {
-          initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
-        }
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<FindUsedTypes>();
-    }
-
-    virtual const char *getPassName() const {
-      return "C backend type canonicalizer";
-    }
-
-    virtual bool runOnModule(Module &M);
-  };
-
-  char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
 
   /// CWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C translation unit.
@@ -104,7 +88,7 @@ namespace {
     const MCAsmInfo* TAsm;
     MCContext *TCtx;
     const TargetData* TD;
-    std::map<const Type *, std::string> TypeNames;
+    
     std::map<const ConstantFP *, unsigned> FPConstantMap;
     std::set<Function*> intrinsicPrototypesAlreadyGenerated;
     std::set<const Argument*> ByValParams;
@@ -113,6 +97,10 @@ namespace {
     DenseMap<const Value*, unsigned> AnonValueNumbers;
     unsigned NextAnonValueNumber;
 
+    /// UnnamedStructIDs - This contains a unique ID for each struct that is
+    /// either anonymous or has no name.
+    DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+    
   public:
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
@@ -158,9 +146,9 @@ namespace {
       delete TCtx;
       delete TAsm;
       FPConstantMap.clear();
-      TypeNames.clear();
       ByValParams.clear();
       intrinsicPrototypesAlreadyGenerated.clear();
+      UnnamedStructIDs.clear();
       return false;
     }
 
@@ -177,6 +165,8 @@ namespace {
                                               const AttrListPtr &PAL,
                                               const PointerType *Ty);
 
+    std::string getStructName(const StructType *ST);
+    
     /// writeOperandDeref - Print the result of dereferencing the specified
     /// operand with '*'.  This is equivalent to printing '*' then using
     /// writeOperand, but avoids excess syntax in some cases.
@@ -205,9 +195,12 @@ namespace {
     std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
 
     void lowerIntrinsics(Function &F);
+    /// Prints the definition of the intrinsic function F. Supports the 
+    /// intrinsics which need to be explicitly defined in the CBackend.
+    void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
 
-    void printModuleTypes(const TypeSymbolTable &ST);
-    void printContainedStructs(const Type *Ty, std::set<const Type *> &);
+    void printModuleTypes();
+    void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
     void printFloatingPointConstants(Function &F);
     void printFloatingPointConstants(const Constant *C);
     void printFunctionSignature(const Function *F, bool Prototype);
@@ -278,7 +271,7 @@ namespace {
       return AI;
     }
 
-    // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+    // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
     static bool isInlineAsm(const Instruction& I) {
       if (const CallInst *CI = dyn_cast<CallInst>(&I))
         return isa<InlineAsm>(CI->getCalledValue());
@@ -351,6 +344,7 @@ namespace {
 char CWriter::ID = 0;
 
 
+
 static std::string CBEMangle(const std::string &S) {
   std::string Result;
 
@@ -366,90 +360,14 @@ static std::string CBEMangle(const std::string &S) {
   return Result;
 }
 
-
-/// This method inserts names for any unnamed structure types that are used by
-/// the program, and removes names from structure types that are not used by the
-/// program.
-///
-bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
-  // Get a set of types that are used by the program...
-  SetVector<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
-
-  // Loop over the module symbol table, removing types from UT that are
-  // already named, and removing names for types that are not used.
-  //
-  TypeSymbolTable &TST = M.getTypeSymbolTable();
-  for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ) {
-    TypeSymbolTable::iterator I = TI++;
-
-    // If this isn't a struct or array type, remove it from our set of types
-    // to name. This simplifies emission later.
-    if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
-        !I->second->isArrayTy()) {
-      TST.remove(I);
-    } else {
-      // If this is not used, remove it from the symbol table.
-      if (!UT.count(I->second))
-        TST.remove(I);
-      else
-        UT.remove(I->second); // Only keep one name for this type.
-    }
-  }
-
-  // UT now contains types that are not named.  Loop over it, naming
-  // structure types.
-  //
-  bool Changed = false;
-  unsigned RenameCounter = 0;
-  for (SetVector<const Type *>::const_iterator I = UT.begin(), E = UT.end();
-       I != E; ++I)
-    if ((*I)->isStructTy() || (*I)->isArrayTy()) {
-      while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
-        ++RenameCounter;
-      Changed = true;
-    }
-
-
-  // Loop over all external functions and globals.  If we have two with
-  // identical names, merge them.
-  // FIXME: This code should disappear when we don't allow values with the same
-  // names when they have different types!
-  std::map<std::string, GlobalValue*> ExtSymbols;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
-    Function *GV = I++;
-    if (GV->isDeclaration() && GV->hasName()) {
-      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
-        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
-      if (!X.second) {
-        // Found a conflict, replace this global with the previous one.
-        GlobalValue *OldGV = X.first->second;
-        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
-        GV->eraseFromParent();
-        Changed = true;
-      }
-    }
-  }
-  // Do the same for globals.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E;) {
-    GlobalVariable *GV = I++;
-    if (GV->isDeclaration() && GV->hasName()) {
-      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
-        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
-      if (!X.second) {
-        // Found a conflict, replace this global with the previous one.
-        GlobalValue *OldGV = X.first->second;
-        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
-        GV->eraseFromParent();
-        Changed = true;
-      }
-    }
-  }
-
-  return Changed;
+std::string CWriter::getStructName(const StructType *ST) {
+  if (!ST->isAnonymous() && !ST->getName().empty())
+    return CBEMangle("l_"+ST->getName().str());
+  
+  return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
 }
 
+
 /// printStructReturnPointerFunctionType - This is like printType for a struct
 /// return type, except, instead of printing the type as void (*)(Struct*, ...)
 /// print it as "Struct (*)(...)", for struct return functions.
@@ -463,7 +381,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
   bool PrintedType = false;
 
   FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
-  const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+  const Type *RetTy = cast<PointerType>(*I)->getElementType();
   unsigned Idx = 1;
   for (++I, ++Idx; I != E; ++I, ++Idx) {
     if (PrintedType)
@@ -551,12 +469,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
     return Out;
   }
 
-  // Check to see if the type is named.
-  if (!IgnoreName || Ty->isOpaqueTy()) {
-    std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
-    if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
-  }
-
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID: {
     const FunctionType *FTy = cast<FunctionType>(Ty);
@@ -591,6 +503,11 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
   }
   case Type::StructTyID: {
     const StructType *STy = cast<StructType>(Ty);
+    
+    // Check to see if the type is named.
+    if (!IgnoreName)
+      return Out << getStructName(STy) << ' ' << NameSoFar;
+    
     Out << NameSoFar + " {\n";
     unsigned Idx = 0;
     for (StructType::element_iterator I = STy->element_begin(),
@@ -631,12 +548,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
     return Out << "; }";
   }
 
-  case Type::OpaqueTyID: {
-    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
-    assert(TypeNames.find(Ty) == TypeNames.end());
-    TypeNames[Ty] = TyName;
-    return Out << TyName << ' ' << NameSoFar;
-  }
   default:
     llvm_unreachable("Unhandled case in getTypeProps!");
   }
@@ -660,7 +571,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
 
   if (isString) {
     Out << '\"';
-    // Keep track of whether the last number was a hexadecimal escape
+    // Keep track of whether the last number was a hexadecimal escape.
     bool LastWasHex = false;
 
     // Do not include the last character, which we know is null
@@ -1751,7 +1662,7 @@ bool CWriter::doInitialization(Module &M) {
 
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TAsm = Match->createAsmInfo(Triple);
+    TAsm = Match->createMCAsmInfo(Triple);
 #endif
   TAsm = new CBEMCAsmInfo();
   TCtx = new MCContext(*TAsm, NULL);
@@ -1777,6 +1688,7 @@ bool CWriter::doInitialization(Module &M) {
   Out << "/* Provide Declarations */\n";
   Out << "#include <stdarg.h>\n";      // Varargs support
   Out << "#include <setjmp.h>\n";      // Unwind support
+  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
   generateCompilerSpecificCode(Out, TD);
 
   // Provide a definition for `bool' if not compiling with a C++ compiler.
@@ -1820,8 +1732,8 @@ bool CWriter::doInitialization(Module &M) {
         << "/* End Module asm statements */\n";
   }
 
-  // Loop over the symbol table, emitting all named constants...
-  printModuleTypes(M.getTypeSymbolTable());
+  // Loop over the symbol table, emitting all named constants.
+  printModuleTypes();
 
   // Global variable declarations...
   if (!M.global_empty()) {
@@ -1855,29 +1767,46 @@ bool CWriter::doInitialization(Module &M) {
   Out << "float fmodf(float, float);\n";
   Out << "long double fmodl(long double, long double);\n";
 
+  // Store the intrinsics which will be declared/defined below.
+  SmallVector<const Function*, 8> intrinsicsToDefine;
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     // Don't print declarations for intrinsic functions.
-    if (!I->isIntrinsic() && I->getName() != "setjmp" &&
-        I->getName() != "longjmp" && I->getName() != "_setjmp") {
-      if (I->hasExternalWeakLinkage())
-        Out << "extern ";
-      printFunctionSignature(I, true);
-      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
-        Out << " __ATTRIBUTE_WEAK__";
-      if (I->hasExternalWeakLinkage())
-        Out << " __EXTERNAL_WEAK__";
-      if (StaticCtors.count(I))
-        Out << " __ATTRIBUTE_CTOR__";
-      if (StaticDtors.count(I))
-        Out << " __ATTRIBUTE_DTOR__";
-      if (I->hasHiddenVisibility())
-        Out << " __HIDDEN__";
-
-      if (I->hasName() && I->getName()[0] == 1)
-        Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+    // Store the used intrinsics, which need to be explicitly defined.
+    if (I->isIntrinsic()) {
+      switch (I->getIntrinsicID()) {
+        default:
+          break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          intrinsicsToDefine.push_back(I);
+          break;
+      }
+      continue;
+    }
+
+    if (I->getName() == "setjmp" ||
+        I->getName() == "longjmp" || I->getName() == "_setjmp")
+      continue;
+
+    if (I->hasExternalWeakLinkage())
+      Out << "extern ";
+    printFunctionSignature(I, true);
+    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+      Out << " __ATTRIBUTE_WEAK__";
+    if (I->hasExternalWeakLinkage())
+      Out << " __EXTERNAL_WEAK__";
+    if (StaticCtors.count(I))
+      Out << " __ATTRIBUTE_CTOR__";
+    if (StaticDtors.count(I))
+      Out << " __ATTRIBUTE_DTOR__";
+    if (I->hasHiddenVisibility())
+      Out << " __HIDDEN__";
+
+    if (I->hasName() && I->getName()[0] == 1)
+      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
 
-      Out << ";\n";
-    }
+    Out << ";\n";
   }
 
   // Output the global variable declarations
@@ -2012,6 +1941,14 @@ bool CWriter::doInitialization(Module &M) {
   Out << "return X <= Y ; }\n";
   Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
   Out << "return X >= Y ; }\n";
+
+  // Emit definitions of the intrinsics.
+  for (SmallVector<const Function*, 8>::const_iterator
+       I = intrinsicsToDefine.begin(),
+       E = intrinsicsToDefine.end(); I != E; ++I) {
+    printIntrinsicDefinition(**I, Out);
+  }
+
   return false;
 }
 
@@ -2085,11 +2022,10 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
 }
 
 
-
 /// printSymbolTable - Run through symbol table looking for type names.  If a
 /// type name is found, emit its declaration...
 ///
-void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+void CWriter::printModuleTypes() {
   Out << "/* Helper union for bitcasts */\n";
   Out << "typedef union {\n";
   Out << "  unsigned int Int32;\n";
@@ -2098,46 +2034,42 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
   Out << "  double Double;\n";
   Out << "} llvmBitCastUnion;\n";
 
-  // We are only interested in the type plane of the symbol table.
-  TypeSymbolTable::const_iterator I   = TST.begin();
-  TypeSymbolTable::const_iterator End = TST.end();
+  // Get all of the struct types used in the module.
+  std::vector<StructType*> StructTypes;
+  TheModule->findUsedStructTypes(StructTypes);
 
-  // If there are no type names, exit early.
-  if (I == End) return;
+  if (StructTypes.empty()) return;
 
-  // Print out forward declarations for structure types before anything else!
   Out << "/* Structure forward decls */\n";
-  for (; I != End; ++I) {
-    std::string Name = "struct " + CBEMangle("l_"+I->first);
-    Out << Name << ";\n";
-    TypeNames.insert(std::make_pair(I->second, Name));
-  }
 
-  Out << '\n';
+  unsigned NextTypeID = 0;
+  
+  // If any of them are missing names, add a unique ID to UnnamedStructIDs.
+  // Print out forward declarations for structure types.
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *ST = StructTypes[i];
 
-  // Now we can print out typedefs.  Above, we guaranteed that this can only be
-  // for struct or opaque types.
-  Out << "/* Typedefs */\n";
-  for (I = TST.begin(); I != End; ++I) {
-    std::string Name = CBEMangle("l_"+I->first);
-    Out << "typedef ";
-    printType(Out, I->second, false, Name);
-    Out << ";\n";
+    if (ST->isAnonymous() || ST->getName().empty())
+      UnnamedStructIDs[ST] = NextTypeID++;
+
+    std::string Name = getStructName(ST);
+
+    Out << "typedef struct " << Name << ' ' << Name << ";\n";
   }
 
   Out << '\n';
 
-  // Keep track of which structures have been printed so far...
-  std::set<const Type *> StructPrinted;
+  // Keep track of which structures have been printed so far.
+  SmallPtrSet<const Type *, 16> StructPrinted;
 
   // Loop over all structures then push them into the stack so they are
   // printed in the correct order.
   //
   Out << "/* Structure contents */\n";
-  for (I = TST.begin(); I != End; ++I)
-    if (I->second->isStructTy() || I->second->isArrayTy())
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
+    if (StructTypes[i]->isStructTy())
       // Only print out used types!
-      printContainedStructs(I->second, StructPrinted);
+      printContainedStructs(StructTypes[i], StructPrinted);
 }
 
 // Push the struct onto the stack and recursively push all structs
@@ -2146,7 +2078,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
 // TODO:  Make this work properly with vector types
 //
 void CWriter::printContainedStructs(const Type *Ty,
-                                    std::set<const Type*> &StructPrinted) {
+                                SmallPtrSet<const Type *, 16> &StructPrinted) {
   // Don't walk through pointers.
   if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
     return;
@@ -2156,14 +2088,13 @@ void CWriter::printContainedStructs(const Type *Ty,
        E = Ty->subtype_end(); I != E; ++I)
     printContainedStructs(*I, StructPrinted);
 
-  if (Ty->isStructTy() || Ty->isArrayTy()) {
+  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
     // Check to see if we have already printed this struct.
-    if (StructPrinted.insert(Ty).second) {
-      // Print structure type out.
-      std::string Name = TypeNames[Ty];
-      printType(Out, Ty, false, Name, true);
-      Out << ";\n\n";
-    }
+    if (!StructPrinted.insert(Ty)) return;
+    
+    // Print structure type out.
+    printType(Out, ST, false, getStructName(ST), true);
+    Out << ";\n\n";
   }
 }
 
@@ -2786,6 +2717,103 @@ void CWriter::visitSelectInst(SelectInst &I) {
   Out << "))";
 }
 
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+                            raw_ostream &Out) {
+  const char* type;
+  const char* sprefix = "";
+
+  unsigned NumBits = Ty.getBitWidth();
+  if (NumBits <= 8) {
+    type = "CHAR";
+    sprefix = "S";
+  } else if (NumBits <= 16) {
+    type = "SHRT";
+  } else if (NumBits <= 32) {
+    type = "INT";
+  } else if (NumBits <= 64) {
+    type = "LLONG";
+  } else {
+    llvm_unreachable("Bit widths > 64 not implemented yet");
+  }
+
+  if (isSigned)
+    Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+  else
+    Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+#ifndef NDEBUG
+static bool isSupportedIntegerSize(const IntegerType &T) {
+  return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+         T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+#endif
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+  const FunctionType *funT = F.getFunctionType();
+  const Type *retT = F.getReturnType();
+  const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+  assert(isSupportedIntegerSize(*elemT) &&
+         "CBackend does not support arbitrary size integers.");
+  assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+         elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+  switch (F.getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unsupported Intrinsic.");
+  case Intrinsic::uadd_with_overflow:
+    // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+    //   Rty r;
+    //   r.field0 = a + b;
+    //   r.field1 = (r.field0 < a);
+    //   return r;
+    // }
+    Out << "static inline ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, false);
+    Out << "a,";
+    printSimpleType(Out, elemT, false);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field0 = a + b;\n";
+    Out << "  r.field1 = (r.field0 < a);\n";
+    Out << "  return r;\n}\n";
+    break;
+    
+  case Intrinsic::sadd_with_overflow:            
+    // static inline Rty sadd_ixx(ixx a, ixx b) {
+    //   Rty r;
+    //   r.field1 = (b > 0 && a > XX_MAX - b) ||
+    //              (b < 0 && a < XX_MIN - b);
+    //   r.field0 = r.field1 ? 0 : a + b;
+    //   return r;
+    // }
+    Out << "static ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, true);
+    Out << "a,";
+    printSimpleType(Out, elemT, true);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field1 = (b > 0 && a > ";
+    printLimitValue(*elemT, true, true, Out);
+    Out << " - b) || (b < 0 && a < ";
+    printLimitValue(*elemT, true, false, Out);
+    Out << " - b);\n";
+    Out << "  r.field0 = r.field1 ? 0 : a + b;\n";
+    Out << "  return r;\n}\n";
+    break;
+  }
+}
 
 void CWriter::lowerIntrinsics(Function &F) {
   // This is used to keep track of intrinsics that get generated to a lowered
@@ -2816,6 +2844,8 @@ void CWriter::lowerIntrinsics(Function &F) {
           case Intrinsic::x86_sse2_cmp_sd:
           case Intrinsic::x86_sse2_cmp_pd:
           case Intrinsic::ppc_altivec_lvsl:
+          case Intrinsic::uadd_with_overflow:
+          case Intrinsic::sadd_with_overflow:
               // We directly implement these intrinsics
             break;
           default:
@@ -3109,6 +3139,14 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     writeOperand(I.getArgOperand(0));
     Out << ")";
     return true;
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+    Out << GetValueName(I.getCalledFunction()) << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
   }
 }
 
@@ -3127,7 +3165,7 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
 
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TargetAsm = Match->createAsmInfo(Triple);
+    TargetAsm = Match->createMCAsmInfo(Triple);
   else
     return c.Codes[0];
 
@@ -3520,7 +3558,8 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
   for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
        i != e; ++i) {
     const Type *IndexedTy =
-      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
+      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
+                                       ArrayRef<unsigned>(b, i+1));
     if (IndexedTy->isArrayTy())
       Out << ".array[" << *i << "]";
     else
@@ -3541,7 +3580,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
     for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
          i != e; ++i) {
       const Type *IndexedTy =
-        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
+        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+                                         ArrayRef<unsigned>(b, i+1));
       if (IndexedTy->isArrayTy())
         Out << ".array[" << *i << "]";
       else
@@ -3565,7 +3605,6 @@ bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   PM.add(createGCLoweringPass());
   PM.add(createLowerInvokePass());
   PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
-  PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
   PM.add(new CWriter(o));
   PM.add(createGCInfoDeleter());
   return false;
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 6fed1959ff63..e64216be0bdc 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -20,8 +20,9 @@
 namespace llvm {
 
 struct CTargetMachine : public TargetMachine {
-  CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
-    : TargetMachine(T) {}
+  CTargetMachine(const Target &T, const std::string &TT,
+                 const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 09b48ce632f2..f982316fc087 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_llvm_library(LLVMTarget
   Mangler.cpp
-  SubtargetFeature.cpp
   Target.cpp
   TargetAsmInfo.cpp
   TargetAsmLexer.cpp
@@ -13,7 +12,7 @@ add_llvm_library(LLVMTarget
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
   TargetRegisterInfo.cpp
-  TargetSubtarget.cpp
+  TargetSubtargetInfo.cpp
   )
 
 set(LLVM_ENUM_ASM_PRINTERS "")
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index a2a2ef1aa9af..0b94e0cf1193 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS SPU.td)
 
-tablegen(SPUGenInstrNames.inc -gen-instr-enums)
-tablegen(SPUGenRegisterNames.inc -gen-register-enums)
 tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
 tablegen(SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SPUGenRegisterInfo.inc -gen-register-desc)
-tablegen(SPUGenInstrInfo.inc -gen-instr-desc)
+tablegen(SPUGenRegisterInfo.inc -gen-register-info)
+tablegen(SPUGenInstrInfo.inc -gen-instr-info)
 tablegen(SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(SPUGenSubtarget.inc -gen-subtarget)
+tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
 tablegen(SPUGenCallingConv.inc -gen-callingconv)
 
 add_llvm_target(CellSPUCodeGen
@@ -18,7 +15,6 @@ add_llvm_target(CellSPUCodeGen
   SPUISelDAGToDAG.cpp
   SPUISelLowering.cpp
   SPUFrameLowering.cpp
-  SPUMCAsmInfo.cpp
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
@@ -27,3 +23,4 @@ add_llvm_target(CellSPUCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..85fb258eac2c
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMCellSPUDesc
+  SPUMCTargetDesc.cpp
+  SPUMCAsmInfo.cpp
+  )
diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..10d9a42239ad
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
index 99aaeb006a0b..8c1176a9d028 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
index 7f850d347f56..7f850d347f56 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..26c5a4bc7b33
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSPUMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+                                          createSPUMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
+  RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 000000000000..c5c037d4de44
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index 77c66be9e857..d7a8247f5702 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -10,12 +10,11 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMCellSPUCodeGen
 TARGET = SPU
-BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
+BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \
 		SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
-		SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
-		SPUGenInstrInfo.inc SPUGenDAGISel.inc \
-		SPUGenSubtarget.inc SPUGenCallingConv.inc
+		SPUGenDAGISel.inc \
+		SPUGenSubtargetInfo.inc SPUGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 72f84300b2c3..b51fbc7a5197 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_IBMCELLSPU_H
 #define LLVM_TARGET_IBMCELLSPU_H
 
+#include "MCTargetDesc/SPUMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -25,11 +26,6 @@ namespace llvm {
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
   FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
 
-  extern Target TheCellSPUTarget;
 }
 
-// Defines symbolic names for the SPU instructions.
-//
-#include "SPUGenInstrNames.inc"
-
 #endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 432f4a1b59e2..a3e7e73ae30a 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -13,7 +13,6 @@
 
 #include "SPU.h"
 #include "SPUFrameLowering.h"
-#include "SPURegisterNames.h"
 #include "SPUInstrBuilder.h"
 #include "SPUInstrInfo.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9351ffdc0b7f..a297d036f03e 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
 #include "SPUTargetMachine.h"
 #include "SPUHazardRecognizers.h"
 #include "SPUFrameLowering.h"
-#include "SPURegisterNames.h"
 #include "SPUTargetMachine.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index f9b50419e7bd..f0ceee214149 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPURegisterNames.h"
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameLowering.h"
@@ -221,6 +220,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 080434d66789..e67b10c7984d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -11,17 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPURegisterNames.h"
 #include "SPUInstrInfo.h"
 #include "SPUInstrBuilder.h"
 #include "SPUTargetMachine.h"
-#include "SPUGenInstrInfo.inc"
 #include "SPUHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/MC/MCContext.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
 
 using namespace llvm;
 
@@ -51,7 +53,7 @@ namespace {
 }
 
 SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
-  : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])),
+  : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
     TM(tm),
     RI(*TM.getSubtargetImpl(), *this)
 { /* NOP */ }
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index e5e91481419a..bc1ba71f7a45 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -18,9 +18,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "SPURegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
 namespace llvm {
   //! Cell SPU instruction information class
-  class SPUInstrInfo : public TargetInstrInfoImpl {
+  class SPUInstrInfo : public SPUGenInstrInfo {
     SPUTargetMachine &TM;
     const SPURegisterInfo RI;
   public:
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 623ae76326bb..19896c0b4be9 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -14,7 +14,6 @@
 #define DEBUG_TYPE "reginfo"
 #include "SPU.h"
 #include "SPURegisterInfo.h"
-#include "SPURegisterNames.h"
 #include "SPUInstrBuilder.h"
 #include "SPUSubtarget.h"
 #include "SPUMachineFunction.h"
@@ -43,6 +42,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
 
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
 using namespace llvm;
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -185,9 +187,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
 
 SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
                                  const TargetInstrInfo &tii) :
-  SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
-  Subtarget(subtarget),
-  TII(tii)
+  SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
 {
 }
 
@@ -371,5 +371,3 @@ SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
   assert( Reg && "Register scavenger failed");
   return Reg;
 }
-
-#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 6ecf0f28dd9f..5e014f8adbfc 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -16,7 +16,9 @@
 #define SPU_REGISTERINFO_H
 
 #include "SPU.h"
-#include "SPUGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
 
 namespace llvm {
   class SPUSubtarget;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index cce0c823c935..e16f51ff0e02 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -155,147 +155,29 @@ def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
 
 // The SPU's registers as 128-bit wide entities, and can function as general
 // purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
 def GPRC : RegisterClass<"SPU", [i128], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+                         (add (sequence "R%u", 0, 79),
+                              (sequence "R%u", 127, 80))>;
 
 // The SPU's registers as 64-bit wide (double word integer) "preferred slot":
-def R64C : RegisterClass<"SPU", [i64], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
 
 // The SPU's registers as 64-bit wide (double word) FP "preferred slot":
-def R64FP : RegisterClass<"SPU", [f64], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
 
 // The SPU's registers as 32-bit wide (word) "preferred slot":
-def R32C : RegisterClass<"SPU", [i32], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
 
 // The SPU's registers as single precision floating point "preferred slot":
-def R32FP : RegisterClass<"SPU", [f32], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
 
 // The SPU's registers as 16-bit wide (halfword) "preferred slot":
-def R16C : RegisterClass<"SPU", [i16], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
 
 // The SPU's registers as 8-bit wide (byte) "preferred slot":
-def R8C : RegisterClass<"SPU", [i8], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
 
 // The SPU's registers as vector registers:
-def VECREG : RegisterClass<"SPU",
-                           [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
-                           128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+                           (add GPRC)>;
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
index 6c3afdf41fdc..e557ed340a28 100644
--- a/lib/Target/CellSPU/SPURegisterNames.h
+++ b/lib/Target/CellSPU/SPURegisterNames.h
@@ -13,6 +13,7 @@
 // Define symbolic names for Cell registers.  This defines a mapping from
 // register name to register number.
 //
-#include "SPUGenRegisterNames.inc"
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
 
 #endif
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 07c8352fba9f..856dc82f786b 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -7,19 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the CellSPU-specific subclass of TargetSubtarget.
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SPUSubtarget.h"
 #include "SPU.h"
-#include "SPUGenSubtarget.inc"
-#include "llvm/ADT/SmallVector.h"
 #include "SPURegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
 
 using namespace llvm;
 
-SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS) :
+  SPUGenSubtargetInfo(TT, CPU, FS),
   StackAlignment(16),
   ProcDirective(SPU::DEFAULT_PROC),
   UseLargeMem(false)
@@ -29,7 +35,10 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
   std::string default_cpu("v0");
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, default_cpu);
+  ParseSubtargetFeatures(default_cpu, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(default_cpu);
 }
 
 /// SetJITMode - This is called to inform the subtarget info that we are
@@ -40,9 +49,9 @@ void SPUSubtarget::SetJITMode() {
 /// Enable PostRA scheduling for optimization levels -O2 and -O3.
 bool SPUSubtarget::enablePostRAScheduler(
                        CodeGenOpt::Level OptLevel,
-                       TargetSubtarget::AntiDepBreakMode& Mode,
+                       TargetSubtargetInfo::AntiDepBreakMode& Mode,
                        RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   // CriticalPathsRCs seems to be the set of
   // RegisterClasses that antidep breakings are performed for.
   // Do it for all register classes 
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index d7929302f080..7c4aa1430217 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -7,20 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Cell SPU-specific subclass of TargetSubtarget.
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef CELLSUBTARGET_H
 #define CELLSUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
 namespace llvm {
   class GlobalValue;
+  class StringRef;
 
   namespace SPU {
     enum {
@@ -29,7 +32,7 @@ namespace llvm {
     };
   }
     
-  class SPUSubtarget : public TargetSubtarget {
+  class SPUSubtarget : public SPUGenSubtargetInfo {
   protected:
     /// stackAlignment - The minimum alignment known to hold of the stack frame
     /// on entry to the function and which must be maintained by every function.
@@ -50,12 +53,12 @@ namespace llvm {
     /// This constructor initializes the data members to match that
     /// of the specified triple.
     ///
-    SPUSubtarget(const std::string &TT, const std::string &FS);
+    SPUSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
     
     /// ParseSubtargetFeatures - Parses features string setting specified 
     /// subtarget options.  Definition of function is auto generated by tblgen.
-    std::string ParseSubtargetFeatures(const std::string &FS,
-                                       const std::string &CPU);
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
     /// SetJITMode - This is called to inform the subtarget info that we are
     /// producing code for the JIT.
@@ -86,7 +89,7 @@ namespace llvm {
     }
 
     bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                               TargetSubtarget::AntiDepBreakMode& Mode,
+                               TargetSubtargetInfo::AntiDepBreakMode& Mode,
                                RegClassVector& CriticalPathRCs) const;
   };
 } // End llvm namespace
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3ed73613a31d..3542a2b87e43 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -12,8 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "SPU.h"
-#include "SPURegisterNames.h"
-#include "SPUMCAsmInfo.h"
 #include "SPUTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
@@ -25,7 +23,6 @@ using namespace llvm;
 extern "C" void LLVMInitializeCellSPUTarget() { 
   // Register the target.
   RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
-  RegisterAsmInfo<SPULinuxMCAsmInfo> Y(TheCellSPUTarget);
 }
 
 const std::pair<unsigned, int> *
@@ -35,9 +32,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
 }
 
 SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
-                                   const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+                                   const std::string &CPU,const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
     FrameLowering(Subtarget),
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 75abd5eb3fca..d96f86dcaeb0 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
   InstrItineraryData  InstrItins;
 public:
   SPUTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS);
+                   const std::string &CPU, const std::string &FS);
 
   /// Return the subtarget implementation object
   virtual const SPUSubtarget     *getSubtargetImpl() const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 797cfd597e60..10d18f61c7e2 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -22,7 +22,9 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,7 +34,7 @@
 #include "llvm/Config/config.h"
 #include <algorithm>
 #include <set>
-
+#include <map>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -75,6 +77,16 @@ extern "C" void LLVMInitializeCppBackendTarget() {
   RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
 }
 
+extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
+  RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
+  RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
+}
+
 namespace {
   typedef std::vector<const Type*> TypeList;
   typedef std::map<const Type*,std::string> TypeMap;
@@ -92,8 +104,6 @@ namespace {
     uint64_t uniqueNum;
     TypeMap TypeNames;
     ValueMap ValueNames;
-    TypeMap UnresolvedTypes;
-    TypeList TypeStack;
     NameSet UsedNames;
     TypeSet DefinedTypes;
     ValueSet DefinedValues;
@@ -140,8 +150,7 @@ namespace {
     inline void printCppName(const Value* val);
 
     void printAttributes(const AttrListPtr &PAL, const std::string &name);
-    bool printTypeInternal(const Type* Ty);
-    inline void printType(const Type* Ty);
+    void printType(const Type* Ty);
     void printTypes(const Module* M);
 
     void printConstant(const Constant *CPV);
@@ -188,26 +197,11 @@ static std::string getTypePrefix(const Type *Ty) {
   case Type::ArrayTyID:    return "array_";
   case Type::PointerTyID:  return "ptr_";
   case Type::VectorTyID:   return "packed_";
-  case Type::OpaqueTyID:   return "opaque_";
   default:                 return "other_";
   }
   return "unknown_";
 }
 
-// Looks up the type in the symbol table and returns a pointer to its name or
-// a null pointer if it wasn't found. Note that this isn't the same as the
-// Mode::getTypeName function which will return an empty string, not a null
-// pointer if the name is not found.
-static const std::string *
-findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
-  TypeSymbolTable::const_iterator TI = ST.begin();
-  TypeSymbolTable::const_iterator TE = ST.end();
-  for (;TI != TE; ++TI)
-    if (TI->second == Ty)
-      return &(TI->first);
-  return 0;
-}
-
 void CppWriter::error(const std::string& msg) {
   report_fatal_error(msg);
 }
@@ -379,18 +373,20 @@ std::string CppWriter::getCppName(const Type* Ty) {
   case Type::StructTyID:      prefix = "StructTy_"; break;
   case Type::ArrayTyID:       prefix = "ArrayTy_"; break;
   case Type::PointerTyID:     prefix = "PointerTy_"; break;
-  case Type::OpaqueTyID:      prefix = "OpaqueTy_"; break;
   case Type::VectorTyID:      prefix = "VectorTy_"; break;
   default:                    prefix = "OtherTy_"; break; // prevent breakage
   }
 
   // See if the type has a name in the symboltable and build accordingly
-  const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
   std::string name;
-  if (tName)
-    name = std::string(prefix) + *tName;
-  else
-    name = std::string(prefix) + utostr(uniqueNum++);
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (STy->hasName())
+      name = STy->getName();
+  
+  if (name.empty())
+    name = utostr(uniqueNum++);
+  
+  name = std::string(prefix) + name;
   sanitize(name);
 
   // Save the name
@@ -503,65 +499,38 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
   }
 }
 
-bool CppWriter::printTypeInternal(const Type* Ty) {
+void CppWriter::printType(const Type* Ty) {
   // We don't print definitions for primitive types
   if (Ty->isPrimitiveType() || Ty->isIntegerTy())
-    return false;
+    return;
 
   // If we already defined this type, we don't need to define it again.
   if (DefinedTypes.find(Ty) != DefinedTypes.end())
-    return false;
+    return;
 
   // Everything below needs the name for the type so get it now.
   std::string typeName(getCppName(Ty));
 
-  // Search the type stack for recursion. If we find it, then generate this
-  // as an OpaqueType, but make sure not to do this multiple times because
-  // the type could appear in multiple places on the stack. Once the opaque
-  // definition is issued, it must not be re-issued. Consequently we have to
-  // check the UnresolvedTypes list as well.
-  TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
-                                          Ty);
-  if (TI != TypeStack.end()) {
-    TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
-    if (I == UnresolvedTypes.end()) {
-      Out << "PATypeHolder " << typeName;
-      Out << "_fwd = OpaqueType::get(mod->getContext());";
-      nl(Out);
-      UnresolvedTypes[Ty] = typeName;
-    }
-    return true;
-  }
-
-  // We're going to print a derived type which, by definition, contains other
-  // types. So, push this one we're printing onto the type stack to assist with
-  // recursive definitions.
-  TypeStack.push_back(Ty);
-
   // Print the type definition
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID:  {
     const FunctionType* FT = cast<FunctionType>(Ty);
-    Out << "std::vector<const Type*>" << typeName << "_args;";
+    Out << "std::vector<Type*>" << typeName << "_args;";
     nl(Out);
     FunctionType::param_iterator PI = FT->param_begin();
     FunctionType::param_iterator PE = FT->param_end();
     for (; PI != PE; ++PI) {
       const Type* argTy = static_cast<const Type*>(*PI);
-      bool isForward = printTypeInternal(argTy);
+      printType(argTy);
       std::string argName(getCppName(argTy));
       Out << typeName << "_args.push_back(" << argName;
-      if (isForward)
-        Out << "_fwd";
       Out << ");";
       nl(Out);
     }
-    bool isForward = printTypeInternal(FT->getReturnType());
+    printType(FT->getReturnType());
     std::string retTypeName(getCppName(FT->getReturnType()));
     Out << "FunctionType* " << typeName << " = FunctionType::get(";
     in(); nl(Out) << "/*Result=*/" << retTypeName;
-    if (isForward)
-      Out << "_fwd";
     Out << ",";
     nl(Out) << "/*Params=*/" << typeName << "_args,";
     nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
@@ -571,23 +540,37 @@ bool CppWriter::printTypeInternal(const Type* Ty) {
   }
   case Type::StructTyID: {
     const StructType* ST = cast<StructType>(Ty);
-    Out << "std::vector<const Type*>" << typeName << "_fields;";
+    if (!ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::createNamed(mod->getContext(), \"";
+      printEscapedString(ST->getName());
+      Out << "\");";
+      nl(Out);
+      // Indicate that this type is now defined.
+      DefinedTypes.insert(Ty);
+    }
+
+    Out << "std::vector<Type*>" << typeName << "_fields;";
     nl(Out);
     StructType::element_iterator EI = ST->element_begin();
     StructType::element_iterator EE = ST->element_end();
     for (; EI != EE; ++EI) {
       const Type* fieldTy = static_cast<const Type*>(*EI);
-      bool isForward = printTypeInternal(fieldTy);
+      printType(fieldTy);
       std::string fieldName(getCppName(fieldTy));
       Out << typeName << "_fields.push_back(" << fieldName;
-      if (isForward)
-        Out << "_fwd";
       Out << ");";
       nl(Out);
     }
-    Out << "StructType* " << typeName << " = StructType::get("
-        << "mod->getContext(), "
-        << typeName << "_fields, /*isPacked=*/"
+
+    if (ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::get(" << "mod->getContext(), ";
+    } else {
+      Out << typeName << "->setBody(";
+    }
+
+    Out << typeName << "_fields, /*isPacked=*/"
         << (ST->isPacked() ? "true" : "false") << ");";
     nl(Out);
     break;
@@ -595,122 +578,55 @@ bool CppWriter::printTypeInternal(const Type* Ty) {
   case Type::ArrayTyID: {
     const ArrayType* AT = cast<ArrayType>(Ty);
     const Type* ET = AT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "ArrayType* " << typeName << " = ArrayType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(AT->getNumElements()) << ");";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "ArrayType* " << typeName << " = ArrayType::get("
+          << elemName
+          << ", " << utostr(AT->getNumElements()) << ");";
+      nl(Out);
+    }
     break;
   }
   case Type::PointerTyID: {
     const PointerType* PT = cast<PointerType>(Ty);
     const Type* ET = PT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "PointerType* " << typeName << " = PointerType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(PT->getAddressSpace()) << ");";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "PointerType* " << typeName << " = PointerType::get("
+          << elemName
+          << ", " << utostr(PT->getAddressSpace()) << ");";
+      nl(Out);
+    }
     break;
   }
   case Type::VectorTyID: {
     const VectorType* PT = cast<VectorType>(Ty);
     const Type* ET = PT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "VectorType* " << typeName << " = VectorType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(PT->getNumElements()) << ");";
-    nl(Out);
-    break;
-  }
-  case Type::OpaqueTyID: {
-    Out << "OpaqueType* " << typeName;
-    Out << " = OpaqueType::get(mod->getContext());";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "VectorType* " << typeName << " = VectorType::get("
+          << elemName
+          << ", " << utostr(PT->getNumElements()) << ");";
+      nl(Out);
+    }
     break;
   }
   default:
     error("Invalid TypeID");
   }
 
-  // If the type had a name, make sure we recreate it.
-  const std::string* progTypeName =
-    findTypeName(TheModule->getTypeSymbolTable(),Ty);
-  if (progTypeName) {
-    Out << "mod->addTypeName(\"" << *progTypeName << "\", "
-        << typeName << ");";
-    nl(Out);
-  }
-
-  // Pop us off the type stack
-  TypeStack.pop_back();
-
   // Indicate that this type is now defined.
   DefinedTypes.insert(Ty);
 
-  // Early resolve as many unresolved types as possible. Search the unresolved
-  // types map for the type we just printed. Now that its definition is complete
-  // we can resolve any previous references to it. This prevents a cascade of
-  // unresolved types.
-  TypeMap::iterator I = UnresolvedTypes.find(Ty);
-  if (I != UnresolvedTypes.end()) {
-    Out << "cast<OpaqueType>(" << I->second
-        << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
-    nl(Out);
-    Out << I->second << " = cast<";
-    switch (Ty->getTypeID()) {
-    case Type::FunctionTyID: Out << "FunctionType"; break;
-    case Type::ArrayTyID:    Out << "ArrayType"; break;
-    case Type::StructTyID:   Out << "StructType"; break;
-    case Type::VectorTyID:   Out << "VectorType"; break;
-    case Type::PointerTyID:  Out << "PointerType"; break;
-    case Type::OpaqueTyID:   Out << "OpaqueType"; break;
-    default:                 Out << "NoSuchDerivedType"; break;
-    }
-    Out << ">(" << I->second << "_fwd.get());";
-    nl(Out); nl(Out);
-    UnresolvedTypes.erase(I);
-  }
-
   // Finally, separate the type definition from other with a newline.
   nl(Out);
-
-  // We weren't a recursive type
-  return false;
-}
-
-// Prints a type definition. Returns true if it could not resolve all the
-// types in the definition but had to use a forward reference.
-void CppWriter::printType(const Type* Ty) {
-  assert(TypeStack.empty());
-  TypeStack.clear();
-  printTypeInternal(Ty);
-  assert(TypeStack.empty());
 }
 
 void CppWriter::printTypes(const Module* M) {
-  // Walk the symbol table and print out all its types
-  const TypeSymbolTable& symtab = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
-       TI != TE; ++TI) {
-
-    // For primitive types and types already defined, just add a name
-    TypeMap::const_iterator TNI = TypeNames.find(TI->second);
-    if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
-        TNI != TypeNames.end()) {
-      Out << "mod->addTypeName(\"";
-      printEscapedString(TI->first);
-      Out << "\", " << getCppName(TI->second) << ");";
-      nl(Out);
-      // For everything else, define the type
-    } else {
-      printType(TI->second);
-    }
-  }
-
-  // Add all of the global variables to the value table...
+  // Add all of the global variables to the value table.
   for (Module::const_global_iterator I = TheModule->global_begin(),
          E = TheModule->global_end(); I != E; ++I) {
     if (I->hasInitializer())
@@ -989,12 +905,12 @@ void CppWriter::printVariableUses(const GlobalVariable *GV) {
   nl(Out);
   printType(GV->getType());
   if (GV->hasInitializer()) {
-    Constant *Init = GV->getInitializer();
+    const Constant *Init = GV->getInitializer();
     printType(Init->getType());
-    if (Function *F = dyn_cast<Function>(Init)) {
+    if (const Function *F = dyn_cast<Function>(Init)) {
       nl(Out)<< "/ Function Declarations"; nl(Out);
       printFunctionHead(F);
-    } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+    } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
       nl(Out) << "// Global Variable Declarations"; nl(Out);
       printVariableHead(gv);
       
@@ -1353,9 +1269,10 @@ void CppWriter::printInstruction(const Instruction *I,
     printEscapedString(phi->getName());
     Out << "\", " << bbname << ");";
     nl(Out);
-    for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+    for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
       Out << iName << "->addIncoming("
-          << opNames[i] << ", " << opNames[i+1] << ");";
+          << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+          << getOpName(phi->getIncomingBlock(i)) << ");";
       nl(Out);
     }
     break;
@@ -1954,8 +1871,8 @@ void CppWriter::printVariable(const std::string& fname,
   Out << "}\n";
 }
 
-void CppWriter::printType(const std::string& fname,
-                          const std::string& typeName) {
+void CppWriter::printType(const std::string &fname,
+                          const std::string &typeName) {
   const Type* Ty = TheModule->getTypeByName(typeName);
   if (!Ty) {
     error(std::string("Type '") + typeName + "' not found in input module");
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index e42166e05584..7322e3e34f00 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,8 +23,8 @@ class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
   CPPTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS)
-    : TargetMachine(T) {}
+                   const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 190379657f42..15965964452a 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -86,8 +86,9 @@ namespace {
       : MBlazeBaseAsmLexer(T, MAI) {
       std::string tripleString("mblaze-unknown-unknown");
       std::string featureString;
+      std::string CPU;
       OwningPtr<const TargetMachine> 
-        targetMachine(T.createTargetMachine(tripleString, featureString));
+        targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
       InitRegisterMap(targetMachine->getRegisterInfo());
     }
   };
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 524f33d19335..eebd9d878943 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -32,7 +32,6 @@ struct MBlazeOperand;
 
 class MBlazeAsmParser : public TargetAsmParser {
   MCAsmParser &Parser;
-  TargetMachine &TM;
 
   MCAsmParser &getParser() const { return Parser; }
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -64,8 +63,8 @@ class MBlazeAsmParser : public TargetAsmParser {
 
 
 public:
-  MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+  MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), Parser(_Parser) {}
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -220,7 +219,7 @@ public:
     return StringRef(Tok.Data, Tok.Length);
   }
 
-  virtual void dump(raw_ostream &OS) const;
+  virtual void print(raw_ostream &OS) const;
 
   static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
     MBlazeOperand *Op = new MBlazeOperand(Token);
@@ -280,7 +279,7 @@ public:
 
 } // end anonymous namespace.
 
-void MBlazeOperand::dump(raw_ostream &OS) const {
+void MBlazeOperand::print(raw_ostream &OS) const {
   switch (Kind) {
   case Immediate:
     getImm()->print(OS);
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 004057ad4ae3..0bc5b7820378 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -1,16 +1,13 @@
 set(LLVM_TARGET_DEFINITIONS MBlaze.td)
 
-tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
-tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
-tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
-tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
 tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
 tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
 tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
 tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
-tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
 tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
 tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
 
@@ -20,7 +17,6 @@ add_llvm_target(MBlazeCodeGen
   MBlazeISelDAGToDAG.cpp
   MBlazeISelLowering.cpp
   MBlazeFrameLowering.cpp
-  MBlazeMCAsmInfo.cpp
   MBlazeRegisterInfo.cpp
   MBlazeSubtarget.cpp
   MBlazeTargetMachine.cpp
@@ -38,3 +34,4 @@ add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 060a87b7c616..88d80a12eb3a 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -27,9 +27,12 @@
 
 // #include "MBlazeGenDecoderTables.inc"
 // #include "MBlazeGenRegisterNames.inc"
-#include "MBlazeGenInstrInfo.inc"
 #include "MBlazeGenEDInfo.inc"
 
+namespace llvm {
+extern MCInstrDesc MBlazeInsts[];
+}
+
 using namespace llvm;
 
 const unsigned UNSUPPORTED = -1;
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 13c4b49f981c..eacca410b986 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -18,11 +18,10 @@
 
 namespace llvm {
   class MCOperand;
-  class TargetMachine;
 
   class MBlazeInstPrinter : public MCInstPrinter {
   public:
-    MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+    MBlazeInstPrinter(const MCAsmInfo &MAI)
       : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
index 00c73f06fe10..3390794c9375 100644
--- a/lib/Target/MBlaze/MBlaze.h
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_MBLAZE_H
 #define TARGET_MBLAZE_H
 
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -22,26 +23,20 @@ namespace llvm {
   class FunctionPass;
   class MachineCodeEmitter;
   class MCCodeEmitter;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
   class TargetAsmBackend;
   class formatted_raw_ostream;
 
-  MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
-                                           TargetMachine &TM,
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCSubtargetInfo &STI,
                                            MCContext &Ctx);
-
+  
   TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
 
   FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
   FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
 
-  extern Target TheMBlazeTarget;
 } // end namespace llvm;
 
-// Defines symbolic names for MBlaze registers.  This defines a mapping from
-// register name to register number.
-#include "MBlazeGenRegisterNames.inc"
-
-// Defines symbolic names for the MBlaze instructions.
-#include "MBlazeGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0f0f60e69f08..0016df569b93 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -319,11 +319,10 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
 }
 
 static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
-                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MBlazeInstPrinter(TM, MAI);
+    return new MBlazeInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 973e96844e81..c07570a487b9 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -109,7 +109,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // Hazard check
   MachineBasicBlock::iterator a = candidate;
   MachineBasicBlock::iterator b = slot;
-  TargetInstrDesc desc = candidate->getDesc();
+  MCInstrDesc desc = candidate->getDesc();
 
   // MBB layout:-
   //    candidate := a0 = operation(a1, a2)
@@ -183,7 +183,7 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
   if (candidate == MBB.begin())
     return false;
 
-  TargetInstrDesc brdesc = (--candidate)->getDesc();
+  MCInstrDesc brdesc = (--candidate)->getDesc();
   return (brdesc.hasDelaySlot());
 }
 
@@ -211,7 +211,7 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
       break;
 
     --I;
-    TargetInstrDesc desc = I->getDesc();
+    MCInstrDesc desc = I->getDesc();
     if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
         desc.isCall() || desc.isReturn() || desc.isBarrier() ||
         hasUnknownSideEffects(I))
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index c5e0a8960ed8..62dfdcc2fd10 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -69,6 +69,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
 
   // Floating point operations which are not supported
   setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::FMA,        MVT::f32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
@@ -1114,15 +1115,19 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
-/// return a list of registers that can be used to satisfy the constraint.
-/// This should only be used for C_RegisterClass constraints.
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
 std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
       return std::make_pair(0U, MBlaze::GPRRegisterClass);
+      // TODO: These can't possibly be right, but match what was in
+      // getRegClassForInlineAsmConstraint.
+    case 'd':
+    case 'y':
     case 'f':
       if (VT == MVT::f32)
         return std::make_pair(0U, MBlaze::GPRRegisterClass);
@@ -1131,32 +1136,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::vector<unsigned> MBlazeTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-    default : break;
-    case 'r':
-    // GCC MBlaze Constraint Letters
-    case 'd':
-    case 'y':
-    case 'f':
-      return make_vector<unsigned>(
-        MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
-        MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
-        MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
-        MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
-        MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
-        MBlaze::R30, MBlaze::R31, 0);
-  }
-  return std::vector<unsigned>();
-}
-
 bool MBlazeTargetLowering::
 isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The MBlaze target isn't yet aware of offsets.
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 265c1a709bc8..bb128da3c7c0 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -173,10 +173,6 @@ namespace llvm {
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
 
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 794ebedf1e6a..188f10a3972e 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -14,18 +14,21 @@
 #include "MBlazeInstrInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "MBlazeMachineFunction.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
 #include "MBlazeGenInstrInfo.inc"
 
 using namespace llvm;
 
 MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
-  : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+  : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
 static bool isZeroImm(const MachineOperand &op) {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index b717da8e2bec..79f962b349bf 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -19,6 +19,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MBlazeRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
 namespace llvm {
 
 namespace MBlaze {
@@ -219,7 +222,7 @@ namespace MBlazeII {
   };
 }
 
-class MBlazeInstrInfo : public TargetInstrInfoImpl {
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
   MBlazeTargetMachine &TM;
   const MBlazeRegisterInfo RI;
 public:
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 7e4a2f5c945e..32d67b264a20 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -92,7 +92,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 
 static const FunctionType *getType(LLVMContext &Context, unsigned id) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
 
 #define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
index 3ece1a8a340d..ddc636d0ce64 100644
--- a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -29,13 +29,12 @@ namespace {
 class MBlazeMCCodeEmitter : public MCCodeEmitter {
   MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
-  MCContext &Ctx;
+  const MCInstrInfo &MCII;
 
 public:
-  MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+  MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                      MCContext &ctx)
+    : MCII(mcii) {
   }
 
   ~MBlazeMCCodeEmitter() {}
@@ -96,10 +95,10 @@ public:
 } // end anonymous namespace
 
 
-MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
-  return new MBlazeMCCodeEmitter(TM, Ctx);
+  return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -179,7 +178,7 @@ void MBlazeMCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
+  const MCInstrDesc &Desc = MCII.get(Opcode);
   uint64_t TSFlags = Desc.TSFlags;
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 517279fda51e..f0b201a66170 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -37,12 +37,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
 using namespace llvm;
 
 MBlazeRegisterInfo::
 MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
-  : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {}
+  : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
 /// MBlaze::R0, return the number that it corresponds to (e.g. 0).
@@ -359,6 +361,3 @@ int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
 int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "MBlazeGenRegisterInfo.inc"
-
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 380783991ce1..7ebce21d3a80 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -17,7 +17,9 @@
 
 #include "MBlaze.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MBlazeGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
 
 namespace llvm {
 class MBlazeSubtarget;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index bd396ed47b36..13c46ba1ecba 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -109,32 +109,9 @@ let Namespace = "MBlaze" in {
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
-  [
-  // Return Values and Arguments
-  R3, R4, R5, R6, R7, R8, R9, R10,
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
 
-  // Not preserved across procedure calls
-  R11, R12,
-
-  // Callee save
-  R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-
-  // Reserved
-  R0,  // Always zero
-  R1,  // The stack pointer
-  R2,  // Read-only small data area anchor
-  R13, // Read-write small data area anchor
-  R14, // Return address for interrupts
-  R15, // Return address for sub-routines
-  R16, // Return address for trap
-  R17, // Return address for exceptions
-  R18, // Reserved for assembler
-  R19  // The frame-pointer
-  ]>;
-
-def SPR : RegisterClass<"MBlaze", [i32], 32,
-  [
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
   // Reserved
   RPC,
   RMSR,
@@ -160,12 +137,12 @@ def SPR : RegisterClass<"MBlaze", [i32], 32,
   RPVR9,
   RPVR10,
   RPVR11
-  ]>
+  )>
 {
   // None of the special purpose registers are allocatable.
   let isAllocatable = 0;
 }
 
-def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> {
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
   let CopyCost = -1;
 }
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index a80744a4769a..eda141daf2b3 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -7,29 +7,42 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the MBlaze specific subclass of TargetSubtarget.
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeSubtarget.h"
 #include "MBlaze.h"
 #include "MBlazeRegisterInfo.h"
-#include "MBlazeGenSubtarget.inc"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS):
+  MBlazeGenSubtargetInfo(TT, CPU, FS),
   HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
   HasFPU(false), HasMul64(false), HasSqrt(false)
 {
   // Parse features string.
-  std::string CPU = "mblaze";
-  CPU = ParseSubtargetFeatures(FS, CPU);
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mblaze";
+  ParseSubtargetFeatures(CPUName, FS);
 
   // Only use instruction scheduling if the selected CPU has an instruction
   // itinerary (the default CPU is the only one that doesn't).
-  HasItin = CPU != "mblaze";
-  DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n");
+  HasItin = CPUName != "mblaze";
+  DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // Compute the issue width of the MBlaze itineraries
   computeIssueWidth();
@@ -41,11 +54,10 @@ void MBlazeSubtarget::computeIssueWidth() {
 
 bool MBlazeSubtarget::
 enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                      TargetSubtarget::AntiDepBreakMode& Mode,
+                      TargetSubtargetInfo::AntiDepBreakMode& Mode,
                       RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   CriticalPathRCs.clear();
   CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
   return HasItin && OptLevel >= CodeGenOpt::Default;
 }
-
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index 2255b2809be2..43b0197ad5aa 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MBlaze specific subclass of TargetSubtarget.
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef MBLAZESUBTARGET_H
 #define MBLAZESUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class MBlazeSubtarget : public TargetSubtarget {
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
 
 protected:
   bool HasBarrel;
@@ -39,12 +42,12 @@ public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  MBlazeSubtarget(const std::string &TT, const std::string &FS);
+  MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   /// Compute the number of maximum number of issues per cycle for the
   /// MBlaze scheduling itineraries.
@@ -52,7 +55,7 @@ public:
 
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const;
 
   /// getInstrItins - Return the instruction itineraies based on subtarget.
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index df34a83e33a8..7208874aef1d 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlaze.h"
-#include "MBlazeMCAsmInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
@@ -21,14 +20,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  default:
-    return new MBlazeMCAsmInfo();
-  }
-}
-
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
                                     raw_ostream &_OS,
@@ -55,9 +46,6 @@ extern "C" void LLVMInitializeMBlazeTarget() {
   // Register the target.
   RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo);
-
   // Register the MC code emitter
   TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
                                       llvm::createMBlazeMCCodeEmitter);
@@ -80,9 +68,9 @@ extern "C" void LLVMInitializeMBlazeTarget() {
 // an easier handling.
 MBlazeTargetMachine::
 MBlazeTargetMachine(const Target &T, const std::string &TT,
-                    const std::string &FS):
-  LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS),
+                    const std::string &CPU, const std::string &FS):
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS),
   DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
   InstrInfo(*this),
   FrameLowering(Subtarget),
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 48ce37a482fc..cd6caafbf309 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -42,7 +42,7 @@ namespace llvm {
 
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
-                        const std::string &FS);
+                        const std::string &CPU, const std::string &FS);
 
     virtual const MBlazeInstrInfo *getInstrInfo() const
     { return &InstrInfo; }
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..3d15708c35b8
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMBlazeDesc
+  MBlazeMCTargetDesc.cpp
+  MBlazeMCAsmInfo.cpp
+  )
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
index 1467141d34ae..0d88466bb300 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+  IsLittleEndian              = false;
+  StackGrowsUp                = false;
   SupportsDebugInformation    = true;
   AlignmentIsInBytes          = false;
   PrivateGlobalPrefix         = "$";
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
index e68dd58b016b..e68dd58b016b 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 000000000000..20d6c0bd2156
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,65 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "MBlazeMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMBlazeMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+                                          createMBlazeMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return new MBlazeMCAsmInfo();
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+  RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 000000000000..b14772ef060b
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMBlazeTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/MBlaze/MCTargetDesc/Makefile b/lib/Target/MBlaze/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..71075ffbf47c
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index e01c60bb8c65..83c2a7d34da1 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -11,15 +11,14 @@ LIBRARYNAME = LLVMMBlazeCodeGen
 TARGET = MBlaze
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
-								MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
-								MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
-								MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
-								MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
-								MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \
-								MBlazeGenEDInfo.inc
+BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \
+		MBlazeGenAsmWriter.inc \
+                MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
+                MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
+                MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \
+                MBlazeGenEDInfo.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 2c7cbb64418f..33f3d449ed99 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS MSP430.td)
 
-tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MSP430GenRegisterNames.inc -gen-register-enums)
-tablegen(MSP430GenRegisterInfo.inc -gen-register-desc)
-tablegen(MSP430GenInstrNames.inc -gen-instr-enums)
-tablegen(MSP430GenInstrInfo.inc -gen-instr-desc)
+tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
+tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
 tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
 tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
 tablegen(MSP430GenCallingConv.inc -gen-callingconv)
-tablegen(MSP430GenSubtarget.inc -gen-subtarget)
+tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(MSP430CodeGen
   MSP430BranchSelector.cpp
@@ -16,7 +13,6 @@ add_llvm_target(MSP430CodeGen
   MSP430ISelLowering.cpp
   MSP430InstrInfo.cpp
   MSP430FrameLowering.cpp
-  MSP430MCAsmInfo.cpp
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
@@ -27,3 +23,4 @@ add_llvm_target(MSP430CodeGen
 
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 63860dcc7e3a..50d98b7c41fd 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -18,11 +18,10 @@
 
 namespace llvm {
   class MCOperand;
-  class TargetMachine;
 
   class MSP430InstPrinter : public MCInstPrinter {
   public:
-    MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+    MSP430InstPrinter(const MCAsmInfo &MAI)
       : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..0f3ebd303924
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMSP430Desc
+  MSP430MCTargetDesc.cpp
+  MSP430MCAsmInfo.cpp
+  )
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 3f4494460554..ad7d380b5631 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+  PointerSize = 2;
+
   PrivateGlobalPrefix = ".L";
   WeakRefDirective ="\t.weak\t";
   PCSymbol=".";
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index f3138a22022d..f3138a22022d 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 000000000000..43a704d7a7df
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "MSP430MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMSP430MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+                                          createMSP430MCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+  RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 000000000000..0d8a6bdb44f9
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/MSP430/MCTargetDesc/Makefile b/lib/Target/MSP430/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..bb857998eef9
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index e74211807c0d..4574ce5f98b7 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_MSP430_H
 #define LLVM_TARGET_MSP430_H
 
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace MSP430CC {
@@ -41,15 +42,6 @@ namespace llvm {
 
   FunctionPass *createMSP430BranchSelectionPass();
 
-  extern Target TheMSP430Target;
-
 } // end namespace llvm;
 
-// Defines symbolic names for MSP430 registers.
-// This defines a mapping from register name to register number.
-#include "MSP430GenRegisterNames.inc"
-
-// Defines symbolic names for the MSP430 instructions.
-#include "MSP430GenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 5264d680d8b3..2042056617ac 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -15,7 +15,6 @@
 #define DEBUG_TYPE "asm-printer"
 #include "MSP430.h"
 #include "MSP430InstrInfo.h"
-#include "MSP430MCAsmInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
 #include "InstPrinter/MSP430InstPrinter.h"
@@ -28,6 +27,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -164,11 +164,10 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
-                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MSP430InstPrinter(TM, MAI);
+    return new MSP430InstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 424df136cc16..846d09361b33 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -15,18 +15,21 @@
 #include "MSP430InstrInfo.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "MSP430GenInstrInfo.inc"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
 using namespace llvm;
 
 MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
-  : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)),
+  : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
     RI(tm, *this), TM(tm) {}
 
 void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -158,13 +161,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -293,7 +296,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 /// instruction may be.  This returns the maximum number of bytes.
 ///
 unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
 
   switch (Desc.TSFlags & MSP430II::SizeMask) {
   default:
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index e885cd36a041..90013f5c2e70 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -17,6 +17,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MSP430RegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
 namespace llvm {
 
 class MSP430TargetMachine;
@@ -37,7 +40,7 @@ namespace MSP430II {
   };
 }
 
-class MSP430InstrInfo : public TargetInstrInfoImpl {
+class MSP430InstrInfo : public MSP430GenInstrInfo {
   const MSP430RegisterInfo RI;
   MSP430TargetMachine &TM;
 public:
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 53f4c2e4a887..1cc60bba3a55 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -26,13 +26,15 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
 using namespace llvm;
 
 // FIXME: Provide proper call frame setup / destroy opcodes.
 MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
                                        const TargetInstrInfo &tii)
-  : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
-    TM(tm), TII(tii) {
+  : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
   StackAlign = TM.getFrameLowering()->getStackAlignment();
 }
 
@@ -117,12 +119,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
 
       MachineInstr *New = 0;
-      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
         New = BuildMI(MF, Old->getDebugLoc(),
                       TII.get(MSP430::SUB16ri), MSP430::SPW)
           .addReg(MSP430::SPW).addImm(Amount);
       } else {
-        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
         // factor out the amount the callee already popped.
         uint64_t CalleeAmt = Old->getOperand(1).getImm();
         Amount -= CalleeAmt;
@@ -140,7 +142,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
         MBB.insert(I, New);
       }
     }
-  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
     // If we are performing frame pointer elimination and if the callee pops
     // something off the stack pointer, add it back.
     if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
@@ -250,5 +252,3 @@ int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
   llvm_unreachable("Not implemented yet!");
   return 0;
 }
-
-#include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index e82055876f25..fb70594ab37c 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -15,7 +15,9 @@
 #define LLVM_TARGET_MSP430REGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MSP430GenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 3ef6ab219def..d1c2e3f7915c 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -66,19 +66,19 @@ def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
 
 def GR8 : RegisterClass<"MSP430", [i8], 8,
    // Volatile registers
-  [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+  (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
    // Frame pointer, sometimes allocable
    FPB,
    // Volatile, but not allocable
-   PCB, SPB, SRB, CGB]>;
+   PCB, SPB, SRB, CGB)>;
 
 def GR16 : RegisterClass<"MSP430", [i16], 16,
    // Volatile registers
-  [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+  (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
    // Frame pointer, sometimes allocable
    FPW,
    // Volatile, but not allocable
-   PCW, SPW, SRW, CGW]>
+   PCW, SPW, SRW, CGW)>
 {
   let SubRegClasses = [(GR8 subreg_8bit)];
 }
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 1346cb9a04dc..b58c50afb982 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -7,19 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the MSP430 specific subclass of TargetSubtarget.
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MSP430Subtarget.h"
 #include "MSP430.h"
-#include "MSP430GenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
 
 using namespace llvm;
 
-MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) {
-  std::string CPU = "generic";
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS) :
+  MSP430GenSubtargetInfo(TT, CPU, FS) {
+  std::string CPUName = "generic";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 1070544f0773..1ce5f11fe1bb 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -7,31 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MSP430 specific subclass of TargetSubtarget.
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TARGET_MSP430_SUBTARGET_H
 #define LLVM_TARGET_MSP430_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
 
 #include <string>
 
 namespace llvm {
+class StringRef;
 
-class MSP430Subtarget : public TargetSubtarget {
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
   bool ExtendedInsts;
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  MSP430Subtarget(const std::string &TT, const std::string &FS);
+  MSP430Subtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index fba95365a6a4..971f512141e8 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "MSP430MCAsmInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
@@ -23,14 +22,14 @@ using namespace llvm;
 extern "C" void LLVMInitializeMSP430Target() {
   // Register the target.
   RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
-  RegisterAsmInfo<MSP430MCAsmInfo> Z(TheMSP430Target);
 }
 
 MSP430TargetMachine::MSP430TargetMachine(const Target &T,
                                          const std::string &TT,
+                                         const std::string &CPU,
                                          const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     // FIXME: Check TargetData string.
     DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index cee3b0480596..2a9eea0bcd82 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
 
 public:
   MSP430TargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
index fa4e80b0ff37..82216edd81e4 100644
--- a/lib/Target/MSP430/Makefile
+++ b/lib/Target/MSP430/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMSP430CodeGen
 TARGET = MSP430
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
-		MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \
-		MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \
+BUILT_SOURCES = MSP430GenRegisterInfo.inc MSP430GenInstrInfo.inc \
+		MSP430GenAsmWriter.inc \
 		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
-		MSP430GenSubtarget.inc
+		MSP430GenSubtargetInfo.inc
 
-DIRS = InstPrinter TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index fd16516f3851..36ab1a97e4f8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS Mips.td)
 
-tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MipsGenRegisterNames.inc -gen-register-enums)
-tablegen(MipsGenRegisterInfo.inc -gen-register-desc)
-tablegen(MipsGenInstrNames.inc -gen-instr-enums)
-tablegen(MipsGenInstrInfo.inc -gen-instr-desc)
+tablegen(MipsGenRegisterInfo.inc -gen-register-info)
+tablegen(MipsGenInstrInfo.inc -gen-instr-info)
 tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
 tablegen(MipsGenDAGISel.inc -gen-dag-isel)
 tablegen(MipsGenCallingConv.inc -gen-callingconv)
-tablegen(MipsGenSubtarget.inc -gen-subtarget)
+tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(MipsCodeGen
   MipsAsmPrinter.cpp
@@ -19,7 +16,8 @@ add_llvm_target(MipsCodeGen
   MipsISelDAGToDAG.cpp
   MipsISelLowering.cpp
   MipsFrameLowering.cpp
-  MipsMCAsmInfo.cpp
+  MipsMCInstLower.cpp
+  MipsMCSymbolRefExpr.cpp
   MipsRegisterInfo.cpp
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
@@ -27,4 +25,6 @@ add_llvm_target(MipsCodeGen
   MipsSelectionDAGInfo.cpp
   )
 
+add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..8852fd4126e6
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmPrinter
+  MipsInstPrinter.cpp
+  )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile
new file mode 100644
index 000000000000..63e38ef3e6aa
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 000000000000..41c1dd3919b4
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+  switch (CC) {
+  case FCOND_F:
+  case FCOND_T:   return "f";
+  case FCOND_UN:
+  case FCOND_OR:  return "un";
+  case FCOND_OEQ:
+  case FCOND_UNE: return "eq";
+  case FCOND_UEQ:
+  case FCOND_ONE: return "ueq";
+  case FCOND_OLT:
+  case FCOND_UGE: return "olt";
+  case FCOND_ULT:
+  case FCOND_OGE: return "ult";
+  case FCOND_OLE:
+  case FCOND_UGT: return "ole";
+  case FCOND_ULE:
+  case FCOND_OGT: return "ule";
+  case FCOND_SF:
+  case FCOND_ST:  return "sf";
+  case FCOND_NGLE:
+  case FCOND_GLE: return "ngle";
+  case FCOND_SEQ:
+  case FCOND_SNE: return "seq";
+  case FCOND_NGL:
+  case FCOND_GL:  return "ngl";
+  case FCOND_LT:
+  case FCOND_NLT: return "lt";
+  case FCOND_NGE:
+  case FCOND_GE:  return "nge";
+  case FCOND_LE:
+  case FCOND_NLE: return "le";
+  case FCOND_NGT:
+  case FCOND_GT:  return "ngt";
+  }
+  llvm_unreachable("Impossible condition code!");
+}
+
+StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << '$' << LowercaseString(getRegisterName(RegNo));
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
+  O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand& MO = MI->getOperand(opNum);
+  O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 000000000000..680208eb819b
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,100 @@
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were orignally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter. 
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+  BRANCH_F,
+  BRANCH_T,
+  BRANCH_FL,
+  BRANCH_TL,
+  BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+  // To be used with float branch True
+  FCOND_F,
+  FCOND_UN,
+  FCOND_OEQ,
+  FCOND_UEQ,
+  FCOND_OLT,
+  FCOND_ULT,
+  FCOND_OLE,
+  FCOND_ULE,
+  FCOND_SF,
+  FCOND_NGLE,
+  FCOND_SEQ,
+  FCOND_NGL,
+  FCOND_LT,
+  FCOND_NGE,
+  FCOND_LE,
+  FCOND_NGT,
+
+  // To be used with float branch False
+  // This conditions have the same mnemonic as the
+  // above ones, but are used with a branch False;
+  FCOND_T,
+  FCOND_OR,
+  FCOND_UNE,
+  FCOND_ONE,
+  FCOND_UGE,
+  FCOND_OGE,
+  FCOND_UGT,
+  FCOND_OGT,
+  FCOND_ST,
+  FCOND_GLE,
+  FCOND_SNE,
+  FCOND_GL,
+  FCOND_NLT,
+  FCOND_GE,
+  FCOND_NLE,
+  FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+  MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getInstructionName(unsigned Opcode);
+  static const char *getRegisterName(unsigned RegNo);
+  
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  
+private:
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..97de75db5347
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMipsDesc
+  MipsMCTargetDesc.cpp
+  MipsMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Mips/MCTargetDesc/Makefile b/lib/Target/Mips/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..7fe2086a6e00
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index c86bf405b8e9..5d9242500f6d 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -12,11 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;
 
 MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::mips)
+    IsLittleEndian = false;
+
   AlignmentIsInBytes          = false;
-  Data16bitsDirective         = "\t.half\t";
+  Data16bitsDirective         = "\t.2byte\t";
   Data32bitsDirective         = "\t.4byte\t";
   Data64bitsDirective         = 0;
   PrivateGlobalPrefix         = "$";
@@ -28,4 +34,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
   HasLEB128 = true;
+  DwarfRegNumForCFI = true;
 }
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 41b719207b7b..41b719207b7b 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 000000000000..06f0d0bfb6b9
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCTargetDesc.h"
+#include "MipsMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMipsMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMipsMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+                                          createMipsMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMipsMCAsmInfo() {
+  RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
+  RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 000000000000..3d18f114c8bd
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index d16b066a624e..cc4a8aef224a 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMipsCodeGen
 TARGET = Mips
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
-                MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
-                MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
+                MipsGenAsmWriter.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
-                MipsGenSubtarget.inc
+                MipsGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 76a26a9ba581..984b5adfc5f3 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_MIPS_H
 #define TARGET_MIPS_H
 
+#include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -28,16 +29,6 @@ namespace llvm {
   FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
   FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
 
-  extern Target TheMipsTarget;
-  extern Target TheMipselTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Mips registers.  This defines a mapping from
-// register name to register number.
-#include "MipsGenRegisterNames.inc"
-
-// Defines symbolic names for the Mips instructions.
-#include "MipsGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index b79016d788f0..433cd57f34e0 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -88,6 +88,14 @@ def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
       FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
       FeatureMinMax, FeatureSwap, FeatureBitCount]>;
 
+def MipsAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
 def Mips : Target {
   let InstructionSet = MipsInstrInfo;
+
+  let AssemblyWriters = [MipsAsmWriter];
 }
+
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8caa7cd2f754..69e03bd29724 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,80 +13,49 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
+#include "MipsAsmPrinter.h"
 #include "Mips.h"
-#include "MipsSubtarget.h"
 #include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
 #include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class MipsAsmPrinter : public AsmPrinter {
-    const MipsSubtarget *Subtarget;
-  public:
-    explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {
-      Subtarget = &TM.getSubtarget<MipsSubtarget>();
-    }
+#include "llvm/Analysis/DebugInfo.h"
 
-    virtual const char *getPassName() const {
-      return "Mips Assembly Printer";
-    }
+using namespace llvm;
 
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                         const char *Modifier = 0);
-    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                         const char *Modifier = 0);
-    void printSavedRegsBitmask(raw_ostream &O);
-    void printHex32(unsigned int Value, raw_ostream &O);
-
-    const char *getCurrentABIString() const;
-    void emitFrameDirective();
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd();
-    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
-                                                   MBB) const;
-    static const char *getRegisterName(unsigned RegNo);
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
 
-    virtual void EmitFunctionEntryLabel();
-    void EmitStartOfAsmFile(Module &M);
-  };
-} // end of anonymous namespace
+  if (MI->isDebugValue()) {
+    PrintDebugValueComment(MI, OS);
+    return;
+  }
 
-#include "MipsGenAsmWriter.inc"
+  MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+  MCInst TmpInst0;
+  MCInstLowering.Lower(MI, TmpInst0);
+  OutStreamer.EmitInstruction(TmpInst0);
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -202,9 +171,9 @@ void MipsAsmPrinter::emitFrameDirective() {
   unsigned stackSize = MF->getFrameInfo()->getStackSize();
 
   OutStreamer.EmitRawText("\t.frame\t$" +
-                          Twine(LowercaseString(getRegisterName(stackReg))) +
-                          "," + Twine(stackSize) + ",$" +
-                          Twine(LowercaseString(getRegisterName(returnReg))));
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+           "," + Twine(stackSize) + ",$" +
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
 }
 
 /// Emit Set directives.
@@ -304,6 +273,19 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                           unsigned OpNum, unsigned AsmVariant,
+                                           const char *ExtraCode,
+                                           raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+     return true; // Unknown modifier.
+   
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  return false;
+}
+
 void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                   raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
@@ -326,7 +308,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 
   switch (MO.getType()) {
     case MachineOperand::MO_Register:
-      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
+      O << '$'
+        << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
       break;
 
     case MachineOperand::MO_Immediate:
@@ -380,27 +363,27 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
 }
 
 void MipsAsmPrinter::
-printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                const char *Modifier) {
-  // when using stack locations for not load/store instructions
-  // print the same way as all normal 3 operand instructions.
-  if (Modifier && !strcmp(Modifier, "stackloc")) {
-    printOperand(MI, opNum+1, O);
-    O << ", ";
-    printOperand(MI, opNum, O);
-    return;
-  }
-
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
   // Load/Store memory operands -- imm($reg)
   // If PIC target the target is loaded as the
   // pattern lw $25,%call16($28)
-  printOperand(MI, opNum, O);
-  O << "(";
   printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
   O << ")";
 }
 
 void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsAsmPrinter::
 printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                 const char *Modifier) {
   const MachineOperand& MO = MI->getOperand(opNum);
@@ -425,8 +408,33 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   OutStreamer.EmitRawText(StringRef("\t.previous"));
 }
 
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+         "Unexpected MachineOperand types");
+  return MachineLocation(MI->getOperand(0).getReg(),
+                         MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  // TODO: implement
+}
+
 // Force static initialization.
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+                                              unsigned SyntaxVariant,
+                                              const MCAsmInfo &MAI) {
+  return new MipsInstPrinter(MAI);
+}
+
 extern "C" void LLVMInitializeMipsAsmPrinter() {
   RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
   RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+                                        createMipsMCInstPrinter);
 }
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 000000000000..16461ff1fbb0
--- /dev/null
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,71 @@
+//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class raw_ostream;
+class MachineBasicBlock;
+class Module;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+  const MipsSubtarget *Subtarget;
+  
+public:
+  explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "Mips Assembly Printer";
+  }
+
+  void EmitInstruction(const MachineInstr *MI);
+  void printSavedRegsBitmask(raw_ostream &O);
+  void printHex32(unsigned int Value, raw_ostream &O);
+  void emitFrameDirective();
+  const char *getCurrentABIString() const;
+  virtual void EmitFunctionEntryLabel();
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd();
+  virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                 MBB) const;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const char *Modifier = 0);
+  void EmitStartOfAsmFile(Module &M);
+  virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 57aeb1d2793c..876f0fcc83ea 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -20,8 +20,8 @@ class CCIfSubtarget<string F, CCAction A>:
 // Only the return rules are defined here for O32. The rules for argument
 // passing are defined in MipsISelLowering.cpp.
 def RetCC_MipsO32 : CallingConv<[
-  // i32 are returned in registers V0, V1
-  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+  // i32 are returned in registers V0, V1, A0, A1
+  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index b44a0af2d436..c3a6211399cd 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -59,10 +59,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB)
 {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
-    const TargetInstrDesc& Tid = I->getDesc();
-    if (Tid.hasDelaySlot() &&
+    const MCInstrDesc& MCid = I->getDesc();
+    if (MCid.hasDelaySlot() &&
         (TM.getSubtarget<MipsSubtarget>().isMips1() ||
-         Tid.isCall() || Tid.isBranch() || Tid.isReturn())) {
+         MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
       MachineBasicBlock::iterator J = I;
       ++J;
       BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp
index f49d490565ff..03d922fe7cd6 100644
--- a/lib/Target/Mips/MipsEmitGPRestore.cpp
+++ b/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -64,8 +64,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
       // Insert lw.
       ++I;
       DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-      BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addImm(0)
-                                                       .addFrameIndex(FI);
+      BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                       .addImm(0);
       Changed = true;
     }
 
@@ -77,8 +77,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
 
       DebugLoc dl = I->getDebugLoc();
       // emit lw $gp, ($gp save slot on stack) after jalr
-      BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addImm(0)
-        .addFrameIndex(FI);
+      BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                         .addImm(0);
       Changed = true;
     }
   } 
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index 4423f5147980..a622258a4dcb 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -61,9 +61,9 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
 
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
-    const TargetInstrDesc& Tid = I->getDesc();
+    const MCInstrDesc& MCid = I->getDesc();
 
-    switch(Tid.getOpcode()) {
+    switch(MCid.getOpcode()) {
     default: 
       ++I;
       continue;
@@ -87,7 +87,7 @@ void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
                                             MachineBasicBlock::iterator I) {  
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
-  const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+  const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
   DebugLoc dl = I->getDebugLoc();
   const unsigned* SubReg =
     TM.getRegisterInfo()->getSubRegisters(DstReg);
@@ -103,7 +103,7 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned SrcReg = I->getOperand(1).getReg();
   unsigned N = I->getOperand(2).getImm();
-  const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+  const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
   DebugLoc dl = I->getDebugLoc();
   const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d8a84ce52991..90aaeb60d06f 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -94,6 +94,10 @@ private:
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
+
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
 };
 
 }
@@ -109,7 +113,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
+SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -166,7 +170,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
          Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
         Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
       SDValue LoVal = Addr.getOperand(1);
-      if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
+      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || 
+          isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
         Base = Addr.getOperand(0);
         Offset = LoVal.getOperand(0);
         return true;
@@ -195,7 +200,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N1, Offset0, Base) ||
+  if (!SelectAddr(N1, Base, Offset0) ||
       N1.getValueType() != MVT::i32)
     return NULL;
 
@@ -225,14 +230,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   //    lwc $f0, X($3)
   //    lwc $f1, X+4($3)
   SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
-                                    MVT::Other, Offset0, Base, Chain);
+                                       MVT::Other, Base, Offset0, Chain);
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
                             MVT::f64, Undef, SDValue(LD0, 0));
 
   SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
-                          MVT::Other, Offset1, Base, SDValue(LD0, 1));
+                                       MVT::Other, Base, Offset1, SDValue(LD0, 1));
   SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
                             MVT::f64, I0, SDValue(LD1, 0));
 
@@ -259,7 +264,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N2, Offset0, Base) ||
+  if (!SelectAddr(N2, Base, Offset0) ||
       N1.getValueType() != MVT::f64 ||
       N2.getValueType() != MVT::i32)
     return NULL;
@@ -289,12 +294,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   // Generate:
   //    swc $f0, X($3)
   //    swc $f1, X+4($3)
-  SDValue Ops0[] = { FPEven, Offset0, Base, Chain };
+  SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
   Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
                                        MVT::Other, Ops0, 4), 0);
   cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
 
-  SDValue Ops1[] = { FPOdd, Offset1, Base, Chain };
+  SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
   Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
                                        MVT::Other, Ops1, 4), 0);
   cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -462,6 +467,14 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   return ResNode;
 }
 
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  OutOps.push_back(Op);
+  return false;
+}
+
 /// createMipsISelDag - This pass converts a legalized DAG into a
 /// MIPS-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index fd90731f50d2..b4f4b1b4bf04 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -59,6 +60,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   default:                         return NULL;
   }
 }
@@ -144,6 +146,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FLOG2,             MVT::f32,   Expand);
   setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
   setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f64,   Expand);
 
   setOperationAction(ISD::EXCEPTIONADDR,     MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,       MVT::i32, Expand);
@@ -773,7 +777,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
     }
 
     BuildMI(BB, dl, TII->get(Mips::SW))
-        .addReg(Incr).addImm(0).addFrameIndex(fi);
+        .addReg(Incr).addFrameIndex(fi).addImm(0);
   }
   BB->addSuccessor(loopMBB);
 
@@ -784,7 +788,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //    sc tmp1, 0(ptr)
   //    beq tmp1, $0, loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
   if (Nand) {
     //  and tmp2, oldval, incr
@@ -797,10 +801,10 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   } else {
     //  lw tmp2, fi(sp)              // load incr from stack
     //  or tmp1, $zero, tmp2
-    BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
     BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
   }
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
     .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
@@ -909,7 +913,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
     }
 
     BuildMI(BB, dl, TII->get(Mips::SW))
-        .addReg(Incr2).addImm(0).addFrameIndex(fi);
+        .addReg(Incr2).addFrameIndex(fi).addImm(0);
   }
   BB->addSuccessor(loopMBB);
 
@@ -922,7 +926,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   //   sc      tmp9,0(addr)
   //   beq     tmp9,$0,loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
   if (Nand) {
     //  and tmp6, oldval, incr2
     //  nor tmp7, $0, tmp6
@@ -937,13 +941,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   } else {
     //  lw tmp6, fi(sp)              // load incr2 from stack
     //  or tmp7, $zero, tmp6
-    BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addImm(0).addFrameIndex(fi);;
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
     BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
   }
   BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
       .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
@@ -1026,14 +1030,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   // hoist "or" instruction out of the block loop2MBB.
 
   BuildMI(BB, dl, TII->get(Mips::SW))
-      .addReg(Newval).addImm(0).addFrameIndex(fi);
+      .addReg(Newval).addFrameIndex(fi).addImm(0);
   BB->addSuccessor(loop1MBB);
 
   // loop1MBB:
   //   ll dest, 0(ptr)
   //   bne dest, oldval, exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BNE))
     .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
   BB->addSuccessor(exitMBB);
@@ -1045,9 +1049,9 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   //   sc tmp1, 0(ptr)
   //   beq tmp1, $0, loop1MBB
   BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
+  BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
     .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
   BB->addSuccessor(loop1MBB);
@@ -1142,7 +1146,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    and     oldval4,oldval3,mask
   //    bne     oldval4,oldval2,exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::BNE))
       .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
@@ -1158,7 +1162,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
   BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
-      .addReg(Tmp7).addImm(0).addReg(Addr);
+      .addReg(Tmp7).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
       .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
   BB->addSuccessor(loop1MBB);
@@ -1189,9 +1193,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 SDValue MipsTargetLowering::
 LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
-  unsigned StackAlignment =
-    getTargetMachine().getFrameLowering()->getStackAlignment();
-  assert(StackAlignment >=
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
          cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
          "Cannot lower if the alignment of the allocated space is larger than \
           that of the stack.");
@@ -1211,24 +1216,14 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   // must be placed in the stack pointer register.
   Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
                            SDValue());
-  // Retrieve updated $sp. There is a glue input to prevent instructions that
-  // clobber $sp from being inserted between copytoreg and copyfromreg.
-  SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32,
-                                     Chain.getValue(1));
-
-  // The stack space reserved by alloca is located right above the argument
-  // area. It is aligned on a boundary that is a multiple of StackAlignment.
-  MachineFunction &MF = DAG.getMachineFunction();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) /
-                      StackAlignment * StackAlignment;
-  SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
-                                 DAG.getConstant(SPOffset, MVT::i32));
 
   // This node always has two return values: a new stack pointer
   // value and a chain
-  SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) };
-  return DAG.getMergeValues(Ops, 2, dl);
+  SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+  SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+  return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
 }
 
 SDValue MipsTargetLowering::
@@ -1358,7 +1353,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     // General Dynamic TLS Model
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
-                                                 0, MipsII::MO_TLSGD);
+                                             0, MipsII::MO_TLSGD);
     SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
     SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
     SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
@@ -1370,36 +1365,36 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     Args.push_back(Entry);
     std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(DAG.getEntryNode(),
-                 (const Type *) Type::getInt32Ty(*DAG.getContext()),
-                 false, false, false, false,
-                 0, CallingConv::C, false, true,
-                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+                    (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                    false, false, false, false, 0, CallingConv::C, false, true,
+                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+                    dl);
 
     return CallResult.first;
-  } else {
-    SDValue Offset;
-    if (GV->isDeclaration()) {
-      // Initial Exec TLS Model
-      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_GOTTPREL);
-      Offset = DAG.getLoad(MVT::i32, dl,
-                                  DAG.getEntryNode(), TGA, MachinePointerInfo(),
-                                  false, false, 0);
-    } else {
-      // Local Exec TLS Model
-      SDVTList VTs = DAG.getVTList(MVT::i32);
-      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_HI);
-      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_LO);
-      SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
-      SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
-      Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
-    }
+  }
 
-    SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
-    return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+  SDValue Offset;
+  if (GV->isDeclaration()) {
+    // Initial Exec TLS Model
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                             MipsII::MO_GOTTPREL);
+    Offset = DAG.getLoad(MVT::i32, dl,
+                         DAG.getEntryNode(), TGA, MachinePointerInfo(),
+                         false, false, 0);
+  } else {
+    // Local Exec TLS Model
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
   }
+
+  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
 }
 
 SDValue MipsTargetLowering::
@@ -1550,8 +1545,8 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
 
 SDValue MipsTargetLowering::
 LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
-  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  assert((Depth == 0) &&
+  // check the depth
+  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
          "Frame address can only be determined for current frame.");
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -1770,6 +1765,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (IsPIC && !MipsFI->getGPFI())
     MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
 
+  // Get the frame index of the stack frame object that points to the location
+  // of dynamically allocated area on the stack.
+  int DynAllocFI = MipsFI->getDynAllocFI();
+
   // Update size of the maximum argument space.
   // For O32, a minimum of four words (16 bytes) of argument space is
   // allocated.
@@ -1781,14 +1780,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (MaxCallFrameSize < NextStackOffset) {
     MipsFI->setMaxCallFrameSize(NextStackOffset);
 
-    if (IsPIC) {
-      // $gp restore slot must be aligned.
-      unsigned StackAlignment = TFL->getStackAlignment();
-      NextStackOffset = (NextStackOffset + StackAlignment - 1) /
-                        StackAlignment * StackAlignment;
-      int GPFI = MipsFI->getGPFI();
-      MFI->setObjectOffset(GPFI, NextStackOffset);
-    }
+    // Set the offsets relative to $sp of the $gp restore slot and dynamically
+    // allocated stack space. These offsets must be aligned to a boundary
+    // determined by the stack alignment of the ABI.
+    unsigned StackAlignment = TFL->getStackAlignment();
+    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+                      StackAlignment * StackAlignment;
+
+    if (IsPIC)
+      MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
   }
 
   // With EABI is it possible to have 16 args on registers.
@@ -1912,7 +1914,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (LoadSymAddr) {
       // Load callee address
       Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
-      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee,
+      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
                                       MachinePointerInfo::getGOT(),
                                       false, false, 0);
 
@@ -1922,9 +1924,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
       } else
         Callee = LoadValue;
-
-      // Use chain output from LoadValue
-      Chain = LoadValue.getValue(1);
     }
 
     // copy to T9
@@ -1965,7 +1964,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true),
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getIntPtrConstant(NextStackOffset, true),
                              DAG.getIntPtrConstant(0, true), InFlag);
   InFlag = Chain.getValue(1);
 
@@ -2332,14 +2332,16 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
-/// return a list of registers that can be used to satisfy the constraint.
-/// This should only be used for C_RegisterClass constraints.
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
 std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
 {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
+    case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+    case 'y': // Same as 'r'. Exists for compatibility.
     case 'r':
       return std::make_pair(0U, Mips::CPURegsRegisterClass);
     case 'f':
@@ -2348,55 +2350,12 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
       if (VT == MVT::f64)
         if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
           return std::make_pair(0U, Mips::AFGR64RegisterClass);
+      break;
     }
   }
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::vector<unsigned> MipsTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const
-{
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-    default : break;
-    case 'r':
-    // GCC Mips Constraint Letters
-    case 'd':
-    case 'y':
-      return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
-             Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
-             Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
-             Mips::T8, 0);
-
-    case 'f':
-      if (VT == MVT::f32) {
-        if (Subtarget->isSingleFloat())
-          return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5,
-                 Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11,
-                 Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24,
-                 Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
-                 Mips::F30, Mips::F31, 0);
-        else
-          return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
-                 Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
-                 Mips::F28, Mips::F30, 0);
-      }
-
-      if (VT == MVT::f64)
-        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
-          return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
-                 Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
-                 Mips::D14, Mips::D15, 0);
-  }
-  return std::vector<unsigned>();
-}
-
 bool
 MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Mips target isn't yet aware of offsets.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index fbcedfddf99a..bda26a229e72 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -79,7 +79,9 @@ namespace llvm {
       BuildPairF64,
       ExtractElementF64,
 
-      WrapperPIC
+      WrapperPIC,
+
+      DynAlloc
     };
   }
 
@@ -167,10 +169,6 @@ namespace llvm {
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
 
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index be044fa1f3b3..0a7a7f2dfe4e 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -14,18 +14,27 @@
 #include "MipsInstrInfo.h"
 #include "MipsTargetMachine.h"
 #include "MipsMachineFunction.h"
-#include "llvm/ADT/STLExtras.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
 #include "MipsGenInstrInfo.inc"
 
 using namespace llvm;
 
 MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
-  : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)),
+  : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
+
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { 
+  return RI;
+}
+
 static bool isZeroImm(const MachineOperand &op) {
   return op.isImm() && op.getImm() == 0;
 }
@@ -40,10 +49,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
       (MI->getOpcode() == Mips::LDC1)) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -61,10 +70,10 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
       (MI->getOpcode() == Mips::SDC1)) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -161,25 +170,25 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   if (RC == Mips::CPURegsRegisterClass)
     BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
-          .addImm(0).addFrameIndex(FI);
+                                      .addFrameIndex(FI).addImm(0);
   else if (RC == Mips::FGR32RegisterClass)
     BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
-          .addImm(0).addFrameIndex(FI);
+                                        .addFrameIndex(FI).addImm(0);
   else if (RC == Mips::AFGR64RegisterClass) {
     if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
       BuildMI(MBB, I, DL, get(Mips::SDC1))
         .addReg(SrcReg, getKillRegState(isKill))
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
     } else {
       const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
       BuildMI(MBB, I, DL, get(Mips::SWC1))
         .addReg(SubSet[0], getKillRegState(isKill))
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
       BuildMI(MBB, I, DL, get(Mips::SWC1))
         .addReg(SubSet[1], getKillRegState(isKill))
-        .addImm(4).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(4);
     }
   } else
     llvm_unreachable("Register class not handled!");
@@ -195,25 +204,34 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == Mips::CPURegsRegisterClass)
-    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
   else if (RC == Mips::FGR32RegisterClass)
-    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
+    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
   else if (RC == Mips::AFGR64RegisterClass) {
     if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
-      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
     } else {
       const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(DestReg);
       BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
       BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
-        .addImm(4).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(4);
     }
   } else
     llvm_unreachable("Register class not handled!");
 }
 
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                        uint64_t Offset, const MDNode *MDPtr,
+                                        DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 //===----------------------------------------------------------------------===//
 // Branch Analysis
 //===----------------------------------------------------------------------===//
@@ -341,8 +359,8 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
                                 const SmallVectorImpl<MachineOperand>& Cond)
   const {
   unsigned Opc = Cond[0].getImm();
-  const TargetInstrDesc &TID = get(Opc);
-  MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID);
+  const MCInstrDesc &MCID = get(Opc);
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
 
   for (unsigned i = 1; i < Cond.size(); ++i)
     MIB.addReg(Cond[i].getReg());
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index abf67733f083..4421c4862fa0 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -19,103 +19,15 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MipsRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
 namespace llvm {
 
 namespace Mips {
-
-  // Mips Branch Codes
-  enum FPBranchCode {
-    BRANCH_F,
-    BRANCH_T,
-    BRANCH_FL,
-    BRANCH_TL,
-    BRANCH_INVALID
-  };
-
-  // Mips Condition Codes
-  enum CondCode {
-    // To be used with float branch True
-    FCOND_F,
-    FCOND_UN,
-    FCOND_OEQ,
-    FCOND_UEQ,
-    FCOND_OLT,
-    FCOND_ULT,
-    FCOND_OLE,
-    FCOND_ULE,
-    FCOND_SF,
-    FCOND_NGLE,
-    FCOND_SEQ,
-    FCOND_NGL,
-    FCOND_LT,
-    FCOND_NGE,
-    FCOND_LE,
-    FCOND_NGT,
-
-    // To be used with float branch False
-    // This conditions have the same mnemonic as the
-    // above ones, but are used with a branch False;
-    FCOND_T,
-    FCOND_OR,
-    FCOND_UNE,
-    FCOND_ONE,
-    FCOND_UGE,
-    FCOND_OGE,
-    FCOND_UGT,
-    FCOND_OGT,
-    FCOND_ST,
-    FCOND_GLE,
-    FCOND_SNE,
-    FCOND_GL,
-    FCOND_NLT,
-    FCOND_GE,
-    FCOND_NLE,
-    FCOND_GT
-  };
-
   /// GetOppositeBranchOpc - Return the inverse of the specified
   /// opcode, e.g. turning BEQ to BNE.
   unsigned GetOppositeBranchOpc(unsigned Opc);
-
-  /// MipsCCToString - Map each FP condition code to its string
-  inline static const char *MipsFCCToString(Mips::CondCode CC)
-  {
-    switch (CC) {
-      default: llvm_unreachable("Unknown condition code");
-      case FCOND_F:
-      case FCOND_T:   return "f";
-      case FCOND_UN:
-      case FCOND_OR:  return "un";
-      case FCOND_OEQ:
-      case FCOND_UNE: return "eq";
-      case FCOND_UEQ:
-      case FCOND_ONE: return "ueq";
-      case FCOND_OLT:
-      case FCOND_UGE: return "olt";
-      case FCOND_ULT:
-      case FCOND_OGE: return "ult";
-      case FCOND_OLE:
-      case FCOND_UGT: return "ole";
-      case FCOND_ULE:
-      case FCOND_OGT: return "ule";
-      case FCOND_SF:
-      case FCOND_ST:  return "sf";
-      case FCOND_NGLE:
-      case FCOND_GLE: return "ngle";
-      case FCOND_SEQ:
-      case FCOND_SNE: return "seq";
-      case FCOND_NGL:
-      case FCOND_GL:  return "ngl";
-      case FCOND_LT:
-      case FCOND_NLT: return "lt";
-      case FCOND_NGE:
-      case FCOND_GE:  return "nge";
-      case FCOND_LE:
-      case FCOND_NLE: return "le";
-      case FCOND_NGT:
-      case FCOND_GT:  return "ngt";
-    }
-  }
 }
 
 /// MipsII - This namespace holds all of the target specific flags that
@@ -164,7 +76,7 @@ namespace MipsII {
   };
 }
 
-class MipsInstrInfo : public TargetInstrInfoImpl {
+class MipsInstrInfo : public MipsGenInstrInfo {
   MipsTargetMachine &TM;
   const MipsRegisterInfo RI;
 public:
@@ -174,7 +86,7 @@ public:
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
   ///
-  virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
+  virtual const MipsRegisterInfo &getRegisterInfo() const;
 
   /// isLoadFromStackSlot - If the specified machine instruction is a direct
   /// load from a stack slot, return the virtual or physical register number of
@@ -224,6 +136,11 @@ public:
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
 
+  virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx, uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 329a002667a0..d1a058712459 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,6 +39,9 @@ def SDT_MipsDivRem       : SDTypeProfile<0, 2,
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
+def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                               SDTCisVT<1, iPTR>]>;
+
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
                          [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -99,6 +102,10 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 
 def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
 
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+                           [SDNPHasChain, SDNPInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Mips Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
@@ -127,7 +134,12 @@ def uimm16      : Operand<i32> {
 // Address operand
 def mem : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops simm16, CPURegs);
+  let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+def mem_ea : Operand<i32> {
+  let PrintMethod = "printMemOperandEA";
+  let MIOperandInfo = (ops CPURegs, simm16);
 }
 
 // Transformation Function - get the lower 16 bits.
@@ -344,7 +356,7 @@ class MoveToLOHI<bits<6> func, string instr_asm>:
      !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
 
 class EffectiveAddress<string instr_asm> :
-  FI<0x09, (outs CPURegs:$dst), (ins mem:$addr),
+  FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
      instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
 
 // Count Leading Ones/Zeros in Word
@@ -412,7 +424,7 @@ def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
 // are used, we have the same behavior, but get also a bunch of warnings
 // from the assembler.
 def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc\n", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
 
 let usesCustomInserter = 1 in {
   def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
@@ -673,7 +685,13 @@ let addr=0 in
 // instructions. The same not happens for stack address copies, so an
 // add op with mem ComplexPattern is used and the stack address copy
 // can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
 
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
@@ -852,6 +870,9 @@ def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
 def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
           (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
 
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 000000000000..f5cc3aa25f1b
--- /dev/null
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,118 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCInstLower.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
+                                 MipsAsmPrinter &asmprinter)
+  : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                              MachineOperandType MOTy) const {
+  MipsMCSymbolRefExpr::VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset = 0;
+
+  switch(MO.getTargetFlags()) {
+  default:                  assert(0 && "Invalid target flag!");
+  case MipsII::MO_NO_FLAG:  Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
+  case MipsII::MO_GPREL:    Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
+  case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+  case MipsII::MO_GOT:      Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
+  case MipsII::MO_ABS_HI:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
+  case MipsII::MO_ABS_LO:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
+  case MipsII::MO_TLSGD:    Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
+  case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+  case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+  case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+  }
+
+  switch (MOTy) {
+    case MachineOperand::MO_MachineBasicBlock:
+      Symbol = MO.getMBB()->getSymbol();
+      break;
+
+    case MachineOperand::MO_GlobalAddress:
+      Symbol = Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_BlockAddress:
+      Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+      break;
+
+    case MachineOperand::MO_ExternalSymbol:
+      Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+      if (MO.getOffset())
+        Offset = MO.getOffset();  
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+  
+  return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
+                                                           Ctx));
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCOp;
+    MachineOperandType MOTy = MO.getType();
+
+    switch (MOTy) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_JumpTableIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, MOTy);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 000000000000..ec5201be7f6d
--- /dev/null
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineFunction;
+  class Mangler;
+  class MipsAsmPrinter;
+  
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+//                    MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+  typedef MachineOperand::MachineOperandType MachineOperandType;
+  MCContext &Ctx;
+  Mangler *Mang;
+  MipsAsmPrinter &AsmPrinter;
+public:
+  MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
+                  MipsAsmPrinter &asmprinter);  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+private:
+  MCOperand LowerSymbolOperand(const MachineOperand &MO,
+                               MachineOperandType MOTy) const;
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
new file mode 100644
index 000000000000..9a2bdae0e339
--- /dev/null
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -0,0 +1,63 @@
+//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mipsmcsymbolrefexpr"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+const MipsMCSymbolRefExpr*
+MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
+                            int Offset, MCContext &Ctx) {
+  return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
+}
+
+void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_Mips_None:     break;
+  case VK_Mips_GPREL:    OS << "%gp_rel("; break;
+  case VK_Mips_GOT_CALL: OS << "%call16("; break;
+  case VK_Mips_GOT:      OS << "%got(";    break;
+  case VK_Mips_ABS_HI:   OS << "%hi(";     break;
+  case VK_Mips_ABS_LO:   OS << "%lo(";     break;
+  case VK_Mips_TLSGD:    OS << "%tlsgd(";  break;
+  case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+  case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+  case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+  }
+
+  OS << *Symbol;
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+
+  if (Kind != VK_Mips_None)
+    OS << ')';
+}
+
+bool
+MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                              const MCAsmLayout *Layout) const {
+  return false;
+}
+
+void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
+  Asm->getOrCreateSymbolData(*Symbol);
+}
+
+const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
+  return Symbol->isDefined() ? &Symbol->getSection() : NULL;
+}
+  
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
new file mode 100644
index 000000000000..3e695963709e
--- /dev/null
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -0,0 +1,62 @@
+//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCSYMBOLREFEXPR_H
+#define MIPSMCSYMBOLREFEXPR_H
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class MipsMCSymbolRefExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_Mips_None,
+    VK_Mips_GPREL,
+    VK_Mips_GOT_CALL,
+    VK_Mips_GOT,
+    VK_Mips_ABS_HI,
+    VK_Mips_ABS_LO,
+    VK_Mips_TLSGD,
+    VK_Mips_GOTTPREL,
+    VK_Mips_TPREL_HI,
+    VK_Mips_TPREL_LO
+  };
+
+private:
+  const VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset;
+
+  explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
+                               int _Offset)
+    : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
+  
+public:
+  static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
+                                           const MCSymbol *Symbol, int Offset,
+                                           MCContext &Ctx);
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const MipsMCSymbolRefExpr *) { return true; }
+
+  int getOffset() const { return Offset; }
+  void setOffset(int O) { Offset = O; }
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index df40e6c748a6..dbb7a6744224 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -27,6 +27,7 @@ namespace llvm {
 class MipsFunctionInfo : public MachineFunctionInfo {
 
 private:
+  MachineFunction& MF;
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
   /// holds the virtual register into which the sret argument is passed.
@@ -47,6 +48,7 @@ private:
   //                LowerCall except for the frame object for restoring $gp. 
   std::pair<int, int> InArgFIRange, OutArgFIRange;
   int GPFI; // Index of the frame object for restoring $gp 
+  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.   
   unsigned MaxCallFrameSize;
 
   /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
@@ -55,10 +57,10 @@ private:
   int AtomicFrameIndex;
 public:
   MipsFunctionInfo(MachineFunction& MF)
-  : SRetReturnReg(0), GlobalBaseReg(0),
+  : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
-    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0),
-    AtomicFrameIndex(-1)
+    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+    MaxCallFrameSize(0), AtomicFrameIndex(-1)
   {}
 
   bool isInArgFI(int FI) const {
@@ -81,6 +83,16 @@ public:
   bool needGPSaveRestore() const { return getGPFI(); }
   bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
 
+  // The first call to this function creates a frame object for dynamically
+  // allocated stack area.
+  int getDynAllocFI() const {
+    if (!DynAllocFI)
+      DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+    return DynAllocFI;
+  }
+  bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index b0984afbebed..24390daff75c 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -35,13 +35,16 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
 
 using namespace llvm;
 
 MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
                                    const TargetInstrInfo &tii)
-  : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {}
+  : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
 /// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -176,28 +179,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                << "spOffset   : " << spOffset << "\n"
                << "stackSize  : " << stackSize << "\n");
 
-  int Offset;
-
-  // Calculate final offset.
-  // - There is no need to change the offset if the frame object is an outgoing
-  //   argument or a $gp restore location,
-  // - If the frame object is any of the following, its offset must be adjusted
-  //   by adding the size of the stack:
-  //   incoming argument, callee-saved register location or local variable.  
-  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex))
-    Offset = spOffset;
-  else
-    Offset = spOffset + stackSize;
-
-  Offset    += MI.getOperand(i-1).getImm();
-
-  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
-
-  unsigned NewReg = 0;
-  int NewImm = 0;
-  MachineBasicBlock &MBB = *MI.getParent();
-  bool ATUsed;
-  unsigned FrameReg;
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   int MinCSFI = 0;
   int MaxCSFI = -1;
@@ -213,42 +194,54 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   //  3. Locations for callee-saved registers.
   // Everything else is referenced relative to whatever register 
   // getFrameRegister() returns.
-  if (MipsFI->isOutArgFI(FrameIndex) ||
+  unsigned FrameReg;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
       (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
     FrameReg = Mips::SP;
   else
     FrameReg = getFrameRegister(MF); 
   
-  // Offset fits in the 16-bit field
-  if (Offset < 0x8000 && Offset >= -0x8000) {
-    NewReg = FrameReg;
-    NewImm = Offset;
-    ATUsed = false;
-  }
-  else {
-    const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  // Calculate final offset.
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
+  // - If the frame object is any of the following, its offset must be adjusted
+  //   by adding the size of the stack:
+  //   incoming argument, callee-saved register location or local variable.  
+  int Offset;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+      MipsFI->isDynAllocFI(FrameIndex))
+    Offset = spOffset;
+  else
+    Offset = spOffset + stackSize;
+
+  Offset    += MI.getOperand(i+1).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+  // field. 
+  if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+    MachineBasicBlock &MBB = *MI.getParent();
     DebugLoc DL = II->getDebugLoc();
-    int ImmLo = (short)(Offset & 0xffff);
     int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
                 ((Offset & 0x8000) != 0);
 
     // FIXME: change this when mips goes MC".
-    BuildMI(MBB, II, DL, TII->get(Mips::NOAT));
-    BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
-    BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(FrameReg)
-                                                        .addReg(Mips::AT);
-    NewReg = Mips::AT;
-    NewImm = ImmLo;
-    
-    ATUsed = true;
-  }
+    BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
+    BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
+    BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
+                                                       .addReg(Mips::AT);
+    FrameReg = Mips::AT;
+    Offset = (short)(Offset & 0xffff);
 
-  // FIXME: change this when mips goes MC".
-  if (ATUsed)
     BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+  }
 
-  MI.getOperand(i).ChangeToRegister(NewReg, false);
-  MI.getOperand(i-1).ChangeToImmediate(NewImm);
+  MI.getOperand(i).ChangeToRegister(FrameReg, false);
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
 }
 
 unsigned MipsRegisterInfo::
@@ -283,5 +276,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
 int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "MipsGenRegisterInfo.inc"
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 76b0035f1696..646369b5966f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -16,7 +16,9 @@
 
 #include "Mips.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MipsGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
 
 namespace llvm {
 class MipsSubtarget;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index e97d4505eb43..f0db518b754b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -157,15 +157,15 @@ let Namespace = "Mips" in {
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def CPURegs : RegisterClass<"Mips", [i32], 32,
+def CPURegs : RegisterClass<"Mips", [i32], 32, (add
   // Return Values and Arguments
-  [V0, V1, A0, A1, A2, A3,
+  V0, V1, A0, A1, A2, A3,
   // Not preserved across procedure calls
   T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
   // Callee save
   S0, S1, S2, S3, S4, S5, S6, S7,
   // Reserved
-  ZERO, AT, K0, K1, GP, SP, FP, RA]>;
+  ZERO, AT, K0, K1, GP, SP, FP, RA)>;
 
 // 64bit fp:
 // * FGR64  - 32 64-bit registers
@@ -174,33 +174,25 @@ def CPURegs : RegisterClass<"Mips", [i32], 32,
 // 32bit fp:
 // * FGR32 - 16 32-bit even registers
 // * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32,
-  // Return Values and Arguments
-  [F0, F1, F2, F3, F12, F13, F14, F15,
-  // Not preserved across procedure calls
-  F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
-  // Callee save
-  F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
-  // Reserved
-  F31]>;
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
 
-def AFGR64 : RegisterClass<"Mips", [f64], 64,
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
   // Return Values and Arguments
-  [D0, D1, D6, D7,
+  D0, D1, D6, D7,
   // Not preserved across procedure calls
   D2, D3, D4, D5, D8, D9,
   // Callee save
   D10, D11, D12, D13, D14,
   // Reserved
-  D15]> {
+  D15)> {
   let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
 }
 
 // Condition Register for floating point operations
-def CCR  : RegisterClass<"Mips", [i32], 32, [FCR31]>;
+def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
 
 // Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
 
 // Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, [HWR29]>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 70747f5da137..6eee3333d584 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -7,27 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Mips specific subclass of TargetSubtarget.
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MipsSubtarget.h"
 #include "Mips.h"
-#include "MipsGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
-                             bool little) :
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+                             const std::string &FS, bool little) :
+  MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
   IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
   HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
   HasSwap(false), HasBitCount(false)
 {
-  std::string CPU = "mips1";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mips1";
   MipsArchVersion = Mips1;
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // Is the target system Linux ?
   if (TT.find("linux") == std::string::npos)
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 096bbed7b047..533d4afe073e 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Mips specific subclass of TargetSubtarget.
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef MIPSSUBTARGET_H
 #define MIPSSUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class MipsSubtarget : public TargetSubtarget {
+class MipsSubtarget : public MipsGenSubtargetInfo {
 
 public:
   enum MipsABIEnum {
@@ -92,12 +95,12 @@ public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
+  MipsSubtarget(const std::string &TT, const std::string &CPU,
+                const std::string &FS, bool little);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isMips1() const { return MipsArchVersion == Mips1; }
   bool isMips32() const { return MipsArchVersion >= Mips32; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index cfbb92c9ac16..20b9f4ea3853 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "MipsMCAsmInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -22,8 +21,6 @@ extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
   RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
-  RegisterAsmInfo<MipsMCAsmInfo> A(TheMipsTarget);
-  RegisterAsmInfo<MipsMCAsmInfo> B(TheMipselTarget);
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -34,10 +31,11 @@ extern "C" void LLVMInitializeMipsTarget() {
 // an easier handling.
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
-MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
+MipsTargetMachine(const Target &T, const std::string &TT,
+                  const std::string &CPU, const std::string &FS,
                   bool isLittle=false):
-  LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS, isLittle),
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS, isLittle),
   DataLayout(isLittle ? 
              std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
              std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
@@ -55,8 +53,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
 
 MipselTargetMachine::
 MipselTargetMachine(const Target &T, const std::string &TT,
-                    const std::string &FS) :
-  MipsTargetMachine(T, TT, FS, true) {}
+                    const std::string &CPU, const std::string &FS) :
+  MipsTargetMachine(T, TT, CPU, FS, true) {}
 
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 102dd8566dde..a021af2ff16d 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -35,7 +35,8 @@ namespace llvm {
     MipsSelectionDAGInfo TSInfo;
   public:
     MipsTargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS, bool isLittle);
+                      const std::string &CPU, const std::string &FS,
+                      bool isLittle);
 
     virtual const MipsInstrInfo   *getInstrInfo()     const
     { return &InstrInfo; }
@@ -73,7 +74,7 @@ namespace llvm {
 class MipselTargetMachine : public MipsTargetMachine {
 public:
   MipselTargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
 };
 
 } // End llvm namespace
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 331266da30b3..ce08916aaac1 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -1,13 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS PTX.td)
 
 tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenCallingConv.inc -gen-callingconv)
 tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
-tablegen(PTXGenInstrNames.inc -gen-instr-enums)
-tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
-tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PTXGenRegisterNames.inc -gen-register-enums)
-tablegen(PTXGenSubtarget.inc -gen-subtarget)
+tablegen(PTXGenInstrInfo.inc -gen-instr-info)
+tablegen(PTXGenRegisterInfo.inc -gen-register-info)
+tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(PTXCodeGen
   PTXAsmPrinter.cpp
@@ -15,7 +13,6 @@ add_llvm_target(PTXCodeGen
   PTXISelLowering.cpp
   PTXInstrInfo.cpp
   PTXFrameLowering.cpp
-  PTXMCAsmInfo.cpp
   PTXMCAsmStreamer.cpp
   PTXMFInfoExtract.cpp
   PTXRegisterInfo.cpp
@@ -24,3 +21,4 @@ add_llvm_target(PTXCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..df0f63fdba60
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPTXDesc
+  PTXMCTargetDesc.cpp
+  PTXMCAsmInfo.cpp
+  )
diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..35f5a7b2e6ad
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
index b670abdbe095..efefead5341d 100644
--- a/lib/Target/PTX/PTXMCAsmInfo.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -12,10 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
 
 using namespace llvm;
 
 PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::ptx64)
+    PointerSize = 8;
+
   CommentString = "//";
 
   PrivateGlobalPrefix = "$L__";
diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
index 03f5d66b3d60..03f5d66b3d60 100644
--- a/lib/Target/PTX/PTXMCAsmInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 000000000000..23f70bd13787
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPTXMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+                                          createPTXMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+                                          createPTXMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializePTXMCAsmInfo() {
+  RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+  RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+}
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 000000000000..1003b0b5ece9
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile
index 2c40d6994094..93dd38aca7ec 100644
--- a/lib/Target/PTX/Makefile
+++ b/lib/Target/PTX/Makefile
@@ -13,14 +13,12 @@ TARGET = PTX
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = PTXGenAsmWriter.inc \
+		PTXGenCallingConv.inc \
 		PTXGenDAGISel.inc \
 		PTXGenInstrInfo.inc \
-		PTXGenInstrNames.inc \
 		PTXGenRegisterInfo.inc \
-		PTXGenRegisterInfo.h.inc \
-		PTXGenRegisterNames.inc \
-		PTXGenSubtarget.inc
+		PTXGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index ec2be9291a04..28cab2429c81 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -15,6 +15,7 @@
 #ifndef PTX_H
 #define PTX_H
 
+#include "MCTargetDesc/PTXMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -42,14 +43,6 @@ namespace llvm {
   FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
 
-  extern Target ThePTX32Target;
-  extern Target ThePTX64Target;
 } // namespace llvm;
 
-// Defines symbolic names for PTX registers.
-#include "PTXGenRegisterNames.inc"
-
-// Defines symbolic names for the PTX instructions.
-#include "PTXGenInstrNames.inc"
-
 #endif // PTX_H
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 231866a08953..f6fbe9fffc6f 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -16,7 +16,7 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// Subtarget Features.
+// Subtarget Features
 //===----------------------------------------------------------------------===//
 
 //===- Architectural Features ---------------------------------------------===//
@@ -30,34 +30,54 @@ def FeatureNoFMA  : SubtargetFeature<"no-fma","SupportsFMA", "false",
 //===- PTX Version --------------------------------------------------------===//
 
 def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
-                                    "Use PTX Language Version 2.0",
-                                    []>;
+                                    "Use PTX Language Version 2.0">;
 
 def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
-                                    "Use PTX Language Version 2.1",
-                                    [FeaturePTX20]>;
+                                    "Use PTX Language Version 2.1">;
 
 def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
-                                    "Use PTX Language Version 2.2",
-                                    [FeaturePTX21]>;
+                                    "Use PTX Language Version 2.2">;
 
 def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
-                                    "Use PTX Language Version 2.3",
-                                    [FeaturePTX22]>;
-
-//===- PTX Shader Model ---------------------------------------------------===//
-
-def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
-                                   "Enable Shader Model 1.0 compliance">;
-def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
-                                   "Enable Shader Model 1.3 compliance",
-                                   [FeatureSM10, FeatureDouble]>;
-def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
-                                   "Enable Shader Model 2.0 compliance",
-                                   [FeatureSM13]>;
+                                    "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+                                   "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+                                   "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+                                   "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+                                   "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+                                   "Use Shader Model 2.0">;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+                                   "Use Shader Model 2.1">;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+                                   "Use Shader Model 2.2">;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+                                   "Use Shader Model 2.3">;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+                                        "PTX_COMPUTE_1_0",
+                                        "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+                                        "PTX_COMPUTE_1_1",
+                                        "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+                                        "PTX_COMPUTE_1_2",
+                                        "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+                                        "PTX_COMPUTE_1_3",
+                                        "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+                                        "PTX_COMPUTE_2_0",
+                                        "Use Compute Compatibility 2.0">;
 
 //===----------------------------------------------------------------------===//
-// PTX supported processors.
+// PTX supported processors
 //===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
@@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features>
 
 def : Proc<"generic", []>;
 
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10",      [FeatureSM10]>;
+def : Proc<"sm_11",      [FeatureSM11]>;
+def : Proc<"sm_12",      [FeatureSM12]>;
+def : Proc<"sm_13",      [FeatureSM13]>;
+def : Proc<"sm_20",      [FeatureSM20]>;
+def : Proc<"sm_21",      [FeatureSM21]>;
+def : Proc<"sm_22",      [FeatureSM22]>;
+def : Proc<"sm_23",      [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80",        [FeatureSM10]>;
+def : Proc<"gt200",      [FeatureSM13]>;
+def : Proc<"gf100",      [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi",      [FeatureSM20, FeatureDouble]>;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -72,6 +113,12 @@ def : Proc<"generic", []>;
 include "PTXRegisterInfo.td"
 
 //===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PTXCallingConv.td"
+
+//===----------------------------------------------------------------------===//
 // Instruction Descriptions
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 29c4781de654..2848d5460eee 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -22,9 +22,12 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -34,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -62,8 +66,13 @@ public:
                        const char *Modifier = 0);
   void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
                          const char *Modifier = 0);
+  void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                          const char *Modifier = 0); 
   void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
 
+  unsigned GetOrCreateSourceID(StringRef FileName,
+                               StringRef DirName);
+
   // autogen'd.
   void printInstruction(const MachineInstr *MI, raw_ostream &OS);
   static const char *getRegisterName(unsigned RegNo);
@@ -71,20 +80,23 @@ public:
 private:
   void EmitVariableDeclaration(const GlobalVariable *gv);
   void EmitFunctionDeclaration();
+
+  StringMap<unsigned> SourceIdMap;
 }; // class PTXAsmPrinter
 } // namespace
 
 static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
 
 static const char *getRegisterTypeName(unsigned RegNo) {
 #define TEST_REGCLS(cls, clsstr)                \
   if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
-  TEST_REGCLS(Preds, pred);
-  TEST_REGCLS(RRegu16, u16);
-  TEST_REGCLS(RRegu32, u32);
-  TEST_REGCLS(RRegu64, u64);
-  TEST_REGCLS(RRegf32, f32);
-  TEST_REGCLS(RRegf64, f64);
+  TEST_REGCLS(RegPred, pred);
+  TEST_REGCLS(RegI16, b16);
+  TEST_REGCLS(RegI32, b32);
+  TEST_REGCLS(RegI64, b64);
+  TEST_REGCLS(RegF32, b32);
+  TEST_REGCLS(RegF64, b64);
 #undef TEST_REGCLS
 
   llvm_unreachable("Not in any register class!");
@@ -162,6 +174,27 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
   OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
                                 (ST.supportsDouble() ? ""
                                                      : ", map_f64_to_f32")));
+  // .address_size directive is optional, but it must immediately follow
+  // the .target directive if present within a module
+  if (ST.supportsPTX23()) {
+    std::string addrSize = ST.is64Bit() ? "64" : "32";
+    OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+  }
+
+  OutStreamer.AddBlankLine();
+
+  // Define any .file directives
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+       E = DbgFinder.compile_unit_end(); I != E; ++I) {
+    DICompileUnit DIUnit(*I);
+    StringRef FN = DIUnit.getFilename();
+    StringRef Dir = DIUnit.getDirectory();
+    GetOrCreateSourceID(FN, Dir);
+  }
+
   OutStreamer.AddBlankLine();
 
   // declare global variables
@@ -194,6 +227,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
     def += ';';
     OutStreamer.EmitRawText(Twine(def));
   }
+
+  const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+  DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+               << " frame object(s)\n");
+  for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+    DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+    if (FrameInfo->getObjectSize(i) > 0) {
+      std::string def = "\t.reg .b";
+      def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+      def += " s";
+      def += utostr(i);
+      def += ";";
+      OutStreamer.EmitRawText(Twine(def));
+    }
+  }
 }
 
 void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -202,6 +250,54 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   raw_string_ostream OS(str);
 
+  DebugLoc DL = MI->getDebugLoc();
+  if (!DL.isUnknown()) {
+
+    const MDNode *S = DL.getScope(MF->getFunction()->getContext());
+
+    // This is taken from DwarfDebug.cpp, which is conveniently not a public
+    // LLVM class.
+    StringRef Fn;
+    StringRef Dir;
+    unsigned Src = 1;
+    if (S) {
+      DIDescriptor Scope(S);
+      if (Scope.isCompileUnit()) {
+        DICompileUnit CU(S);
+        Fn = CU.getFilename();
+        Dir = CU.getDirectory();
+      } else if (Scope.isFile()) {
+        DIFile F(S);
+        Fn = F.getFilename();
+        Dir = F.getDirectory();
+      } else if (Scope.isSubprogram()) {
+        DISubprogram SP(S);
+        Fn = SP.getFilename();
+        Dir = SP.getDirectory();
+      } else if (Scope.isLexicalBlock()) {
+        DILexicalBlock DB(S);
+        Fn = DB.getFilename();
+        Dir = DB.getDirectory();
+      } else
+        assert(0 && "Unexpected scope info");
+
+      Src = GetOrCreateSourceID(Fn, Dir);
+    }
+    OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
+                                     0, 0, 0, Fn);
+
+    const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
+
+    OS << "\t.loc ";
+    OS << utostr(MDL.getFileNum());
+    OS << " ";
+    OS << utostr(MDL.getLine());
+    OS << " ";
+    OS << utostr(MDL.getColumn());
+    OS << "\n";
+  }
+
+
   // Emit predicate
   printPredicateOperand(MI, OS);
 
@@ -275,6 +371,11 @@ void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
   OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
 }
 
+void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
+                                       raw_ostream &OS, const char *Modifier) {
+  OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
 void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
   // Check to see if this is a special global used by LLVM, if so, emit it.
   if (EmitSpecialLLVMGlobal(gv))
@@ -311,7 +412,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
     decl += ".b8 ";
     decl += gvsym->getName();
     decl += "[";
-    
+
     if (elementTy->isArrayTy())
     {
       assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
@@ -320,7 +421,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
       elementTy = arrayTy->getElementType();
 
       unsigned numElements = arrayTy->getNumElements();
-      
+
       while (elementTy->isArrayTy()) {
 
         arrayTy = dyn_cast<const ArrayType>(elementTy);
@@ -336,17 +437,17 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
       // Compute the size of the array, in bytes.
       uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
                         * numElements;
-  
+
       decl += utostr(arraySize);
     }
-    
+
     decl += "]";
-    
+
     // handle string constants (assume ConstantArray means string)
-    
+
     if (gv->hasInitializer())
     {
-      Constant *C = gv->getInitializer();  
+      const Constant *C = gv->getInitializer();  
       if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
       {
         decl += " = {";
@@ -354,10 +455,11 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
         for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         {
           if (i > 0)   decl += ",";
-      
-          decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+          decl += "0x" +
+                utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
         }
-      
+
         decl += "}";
       }
     }
@@ -393,17 +495,25 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
 
   const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
   const bool isKernel = MFI->isKernel();
-  unsigned reg;
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
 
   std::string decl = isKernel ? ".entry" : ".func";
 
-  // Print return register
-  reg = MFI->retReg();
-  if (!isKernel && reg != PTX::NoRegister) {
-    decl += " (.reg ."; // FIXME: could it return in .param space?
-    decl += getRegisterTypeName(reg);
-    decl += " ";
-    decl += getRegisterName(reg);
+  unsigned cnt = 0;
+
+  if (!isKernel) {
+    decl += " (";
+    for (PTXMachineFunctionInfo::ret_iterator
+         i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
+         i != e; ++i) {
+      if (i != b) {
+        decl += ", ";
+      }
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
+    }
     decl += ")";
   }
 
@@ -411,40 +521,31 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
   decl += " ";
   decl += CurrentFnSym->getName().str();
 
-  // Print parameter list
-  if (!MFI->argRegEmpty()) {
-    decl += " (";
-    if (isKernel) {
-      unsigned cnt = 0;
-      for(PTXMachineFunctionInfo::reg_iterator
-          i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
-          i != e; ++i) {
-        reg = *i;
-        assert(reg != PTX::NoRegister && "Not a valid register!");
-        if (i != b)
-          decl += ", ";
-        decl += ".param .";
-        decl += getRegisterTypeName(reg);
-        decl += " ";
-        decl += PARAM_PREFIX;
-        decl += utostr(++cnt);
-      }
+  decl += " (";
+
+  cnt = 0;
+
+  // Print parameters
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+       i != e; ++i) {
+    if (i != b) {
+      decl += ", ";
+    }
+    if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+      decl += ".param .b";
+      decl += utostr(*i);
+      decl += " ";
+      decl += PARAM_PREFIX;
+      decl += utostr(++cnt);
     } else {
-      for (PTXMachineFunctionInfo::reg_iterator
-           i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
-           i != e; ++i) {
-        reg = *i;
-        assert(reg != PTX::NoRegister && "Not a valid register!");
-        if (i != b)
-          decl += ", ";
-        decl += ".reg .";
-        decl += getRegisterTypeName(reg);
-        decl += " ";
-        decl += getRegisterName(reg);
-      }
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
     }
-    decl += ")";
   }
+  decl += ")";
 
   OutStreamer.EmitRawText(Twine(decl));
 }
@@ -468,6 +569,33 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
   }
 }
 
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+                                            StringRef DirName) {
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>", StringRef());
+
+  // MCStream expects full path name as filename.
+  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+    SmallString<128> FullPathName = DirName;
+    sys::path::append(FullPathName, FileName);
+    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+  }
+
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
+
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
+
+  // Print out a .file directive to specify files for .loc directives.
+  OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+  return SrcId;
+}
+
 #include "PTXGenAsmWriter.inc"
 
 // Force static initialization.
diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td
new file mode 100644
index 000000000000..3e3ff4896621
--- /dev/null
+++ b/lib/Target/PTX/PTXCallingConv.td
@@ -0,0 +1,29 @@
+
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
+]>;
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index b3c85da7b446..9adfa624b29e 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "PTXTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -41,8 +42,6 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
 #include "PTXGenDAGISel.inc"
 
   private:
-    SDNode *SelectREAD_PARAM(SDNode *Node);
-
     // We need this only because we can't match intruction BRAdp
     // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
     SDNode *SelectBRCOND(SDNode *Node);
@@ -67,8 +66,6 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
 
 SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
   switch (Node->getOpcode()) {
-    case PTXISD::READ_PARAM:
-      return SelectREAD_PARAM(Node);
     case ISD::BRCOND:
       return SelectBRCOND(Node);
     default:
@@ -76,37 +73,6 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
   }
 }
 
-SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
-  SDValue  index = Node->getOperand(1);
-  DebugLoc dl    = Node->getDebugLoc();
-  unsigned opcode;
-
-  if (index.getOpcode() != ISD::TargetConstant)
-    llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
-
-  if (Node->getValueType(0) == MVT::i16) {
-    opcode = PTX::LDpiU16;
-  }
-  else if (Node->getValueType(0) == MVT::i32) {
-    opcode = PTX::LDpiU32;
-  }
-  else if (Node->getValueType(0) == MVT::i64) {
-    opcode = PTX::LDpiU64;
-  }
-  else if (Node->getValueType(0) == MVT::f32) {
-    opcode = PTX::LDpiF32;
-  }
-  else if (Node->getValueType(0) == MVT::f64) {
-    opcode = PTX::LDpiF64;
-  }
-  else {
-    llvm_unreachable("Unknown parameter type for ld.param");
-  }
-
-  return PTXInstrInfo::
-    GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
-}
-
 SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
   assert(Node->getNumOperands() >= 3);
 
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e9b1d8c3bbef..6fcf710e3f1f 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -15,7 +15,9 @@
 #include "PTXISelLowering.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -24,49 +26,80 @@
 
 using namespace llvm;
 
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "PTXGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
 PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the register classes.
-  addRegisterClass(MVT::i1,  PTX::PredsRegisterClass);
-  addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
-  addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
-  addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
-  addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
-  addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+  addRegisterClass(MVT::i1,  PTX::RegPredRegisterClass);
+  addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+  addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+  addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+  addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+  addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
 
   setBooleanContents(ZeroOrOneBooleanContent);
+  setMinFunctionAlignment(2);
   
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
-
-  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  ////////////////////////////////////
+  /////////// Expansion //////////////
+  ////////////////////////////////////
+  
+  // (any/zero/sign) extload => load + (any/zero/sign) extend
   
-  // Turn i16 (z)extload into load + (z)extend
   setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
-
-  // Turn f32 extload into load + fextend
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
   
-  // Turn f64 truncstore into trunc + store.
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  // f32 extload => load + fextend
+  
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);  
+  
+  // f64 truncstore => trunc + store
+  
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand); 
+  
+  // sign_extend_inreg => sign_extend
+  
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  
+  // br_cc => brcond
   
-  // Customize translation of memory addresses
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-
-  // Expand BR_CC into BRCOND
   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
 
-  // Expand SELECT_CC into SETCC
+  // select_cc => setcc
+  
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
   
-  // need to lower SETCC of Preds into bitwise logic
+  ////////////////////////////////////
+  //////////// Legal /////////////////
+  ////////////////////////////////////
+  
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  
+  ////////////////////////////////////
+  //////////// Custom ////////////////
+  ////////////////////////////////////
+  
+  // customise setcc to use bitwise logic if possible
+  
   setOperationAction(ISD::SETCC, MVT::i1, Custom);
 
-  setMinFunctionAlignment(2);
+  // customize translation of memory addresses
+  
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 
   // Compute derived properties from the register classes
   computeRegisterProperties();
@@ -93,8 +126,10 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
       llvm_unreachable("Unknown opcode");
     case PTXISD::COPY_ADDRESS:
       return "PTXISD::COPY_ADDRESS";
-    case PTXISD::READ_PARAM:
-      return "PTXISD::READ_PARAM";
+    case PTXISD::LOAD_PARAM:
+      return "PTXISD::LOAD_PARAM";
+    case PTXISD::STORE_PARAM:
+      return "PTXISD::STORE_PARAM";
     case PTXISD::EXIT:
       return "PTXISD::EXIT";
     case PTXISD::RET:
@@ -113,18 +148,18 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op2 = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  
+
   // Look for X == 0, X == 1, X != 0, or X != 1  
   // We can simplify these to bitwise logic
-  
+
   if (Op1.getOpcode() == ISD::Constant &&
       (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
        cast<ConstantSDNode>(Op1)->isNullValue()) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 
-	  return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+    return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
   }
-  
+
   return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
 }
 
@@ -149,27 +184,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-namespace {
-struct argmap_entry {
-  MVT::SimpleValueType VT;
-  TargetRegisterClass *RC;
-  TargetRegisterClass::iterator loc;
-
-  argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
-    : VT(_VT), RC(_RC), loc(_RC->begin()) {}
-
-  void reset() { loc = RC->begin(); }
-  bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
-} argmap[] = {
-  argmap_entry(MVT::i1,  PTX::PredsRegisterClass),
-  argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
-  argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
-  argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
-  argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
-  argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
-};
-}                               // end anonymous namespace
-
 SDValue PTXTargetLowering::
   LowerFormalArguments(SDValue Chain,
                        CallingConv::ID CallConv,
@@ -181,6 +195,7 @@ SDValue PTXTargetLowering::
   if (isVarArg) llvm_unreachable("PTX does not support varargs");
 
   MachineFunction &MF = DAG.getMachineFunction();
+  const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
   PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
 
   switch (CallConv) {
@@ -195,44 +210,76 @@ SDValue PTXTargetLowering::
       break;
   }
 
-  // Make sure we don't add argument registers twice
-  if (MFI->isDoneAddArg())
-    llvm_unreachable("cannot add argument registers twice");
-
-  // Reset argmap before allocation
-  for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
-       i != e; ++ i)
-    i->reset();
-
-  for (int i = 0, e = Ins.size(); i != e; ++ i) {
-    MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
-
-    struct argmap_entry *entry = std::find(argmap,
-                                           argmap + array_lengthof(argmap), VT);
-    if (entry == argmap + array_lengthof(argmap))
-      llvm_unreachable("Type of argument is not supported");
-
-    if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
-      llvm_unreachable("cannot pass preds to kernel");
-
-    MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-
-    unsigned preg = *++(entry->loc); // allocate start from register 1
-    unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
-    RegInfo.addLiveIn(preg, vreg);
-
-    MFI->addArgReg(preg);
-
-    SDValue inval;
-    if (MFI->isKernel())
-      inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
-                          DAG.getTargetConstant(i, MVT::i32));
-    else
-      inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
-    InVals.push_back(inval);
+  // We do one of two things here:
+  // IsKernel || SM >= 2.0  ->  Use param space for arguments
+  // SM < 2.0               ->  Use registers for arguments
+  if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+    // We just need to emit the proper LOAD_PARAM ISDs
+    for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+
+      assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+             "Kernels cannot take pred operands");
+
+      SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+                                     DAG.getTargetConstant(i, MVT::i32));
+      InVals.push_back(ArgValue);
+
+      // Instead of storing a physical register in our argument list, we just
+      // store the total size of the parameter, in bits.  The ASM printer
+      // knows how to process this.
+      MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+    }
+  }
+  else {
+    // For device functions, we use the PTX calling convention to do register
+    // assignments then create CopyFromReg ISDs for the allocated registers
+
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+                   *DAG.getContext());
+
+    CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+      CCValAssign&         VA    = ArgLocs[i];
+      EVT                  RegVT = VA.getLocVT();
+      TargetRegisterClass* TRC   = 0;
+
+      assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+      // Determine which register class we need
+      if (RegVT == MVT::i1) {
+        TRC = PTX::RegPredRegisterClass;
+      }
+      else if (RegVT == MVT::i16) {
+        TRC = PTX::RegI16RegisterClass;
+      }
+      else if (RegVT == MVT::i32) {
+        TRC = PTX::RegI32RegisterClass;
+      }
+      else if (RegVT == MVT::i64) {
+        TRC = PTX::RegI64RegisterClass;
+      }
+      else if (RegVT == MVT::f32) {
+        TRC = PTX::RegF32RegisterClass;
+      }
+      else if (RegVT == MVT::f64) {
+        TRC = PTX::RegF64RegisterClass;
+      }
+      else {
+        llvm_unreachable("Unknown parameter type");
+      }
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      InVals.push_back(ArgValue);
+
+      MFI->addArgReg(VA.getLocReg());
+    }
   }
-
-  MFI->doneAddArg();
 
   return Chain;
 }
@@ -254,51 +301,47 @@ SDValue PTXTargetLowering::
       assert(Outs.size() == 0 && "Kernel must return void.");
       return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
     case CallingConv::PTX_Device:
-      assert(Outs.size() <= 1 && "Can at most return one value.");
+      //assert(Outs.size() <= 1 && "Can at most return one value.");
       break;
   }
 
-  // PTX_Device
-
-  // return void
-  if (Outs.size() == 0)
-    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+  MachineFunction& MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
 
   SDValue Flag;
-  unsigned reg;
 
-  if (Outs[0].VT == MVT::i16) {
-    reg = PTX::RH0;
-  }
-  else if (Outs[0].VT == MVT::i32) {
-    reg = PTX::R0;
-  }
-  else if (Outs[0].VT == MVT::i64) {
-    reg = PTX::RD0;
-  }
-  else if (Outs[0].VT == MVT::f32) {
-    reg = PTX::F0;
-  }
-  else {
-    assert(Outs[0].VT == MVT::f64 && "Can return only basic types");
-    reg = PTX::FD0;
-  }
+  // Even though we could use the .param space for return arguments for
+  // device functions if SM >= 2.0 and the number of return arguments is
+  // only 1, we just always use registers since this makes the codegen
+  // easier.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  getTargetMachine(), RVLocs, *DAG.getContext());
 
-  MachineFunction &MF = DAG.getMachineFunction();
-  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
-  MFI->setRetReg(reg);
+  CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
+
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign& VA  = RVLocs[i];
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty())
-    DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+    assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
 
-  // Copy the result values into the output registers
-  Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+    unsigned Reg = VA.getLocReg();
 
-  // Guarantee that all emitted copies are stuck together,
-  // avoiding something bad
-  Flag = Chain.getValue(1);
+    DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
 
-  return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+    Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad
+    Flag = Chain.getValue(1);
+
+    MFI->addRetReg(Reg);
+  }
+
+  if (Flag.getNode() == 0) {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+  }
+  else {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+  }
 }
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index 225c0004a913..43185416e1fc 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -24,12 +24,13 @@ class PTXTargetMachine;
 namespace PTXISD {
   enum NodeType {
     FIRST_NUMBER = ISD::BUILTIN_OP_END,
-    READ_PARAM,
+    LOAD_PARAM,
+    STORE_PARAM,
     EXIT,
     RET,
     COPY_ADDRESS
   };
-} // namespace PTXISD
+}                               // namespace PTXISD
 
 class PTXTargetLowering : public TargetLowering {
   public:
@@ -40,7 +41,7 @@ class PTXTargetLowering : public TargetLowering {
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-    
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv,
@@ -58,9 +59,9 @@ class PTXTargetLowering : public TargetLowering {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl,
                   SelectionDAG &DAG) const;
-    
+
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
-    
+
   private:
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 }; // class PTXTargetLowering
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
index e4e099987e8d..8cee351ee0df 100644
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -9,7 +9,7 @@
 
 // PTX Predicate operand, default to (0, 0) = (zero-reg, always).
 // Leave PrintMethod empty; predicate printing is defined elsewhere.
-def pred : PredicateOperand<OtherVT, (ops Preds, i32imm),
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
                                      (ops (i1 zero_reg), (i32 0))>;
 
 let Namespace = "PTX" in {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index a12a6d01afa7..425265a2fdb7 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -18,27 +18,29 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-using namespace llvm;
-
+#define GET_INSTRINFO_CTOR
 #include "PTXGenInstrInfo.inc"
 
+using namespace llvm;
+
 PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
-  : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
+  : PTXGenInstrInfo(),
     RI(_TM, *this), TM(_TM) {}
 
 static const struct map_entry {
   const TargetRegisterClass *cls;
   const int opcode;
 } map[] = {
-  { &PTX::RRegu16RegClass, PTX::MOVU16rr },
-  { &PTX::RRegu32RegClass, PTX::MOVU32rr },
-  { &PTX::RRegu64RegClass, PTX::MOVU64rr },
-  { &PTX::RRegf32RegClass, PTX::MOVF32rr },
-  { &PTX::RRegf64RegClass, PTX::MOVF64rr },
-  { &PTX::PredsRegClass,   PTX::MOVPREDrr }
+  { &PTX::RegI16RegClass, PTX::MOVU16rr },
+  { &PTX::RegI32RegClass, PTX::MOVU32rr },
+  { &PTX::RegI64RegClass, PTX::MOVU64rr },
+  { &PTX::RegF32RegClass, PTX::MOVF32rr },
+  { &PTX::RegF64RegClass, PTX::MOVF64rr },
+  { &PTX::RegPredRegClass,   PTX::MOVPREDrr }
 };
 
 void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -47,8 +49,8 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                bool KillSrc) const {
   for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
     if (map[i].cls->contains(DstReg, SrcReg)) {
-      const TargetInstrDesc &TID = get(map[i].opcode);
-      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
         addReg(SrcReg, getKillRegState(KillSrc));
       AddDefaultPredicate(MI);
       return;
@@ -69,8 +71,8 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 
   for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
     if (DstRC == map[i].cls) {
-      const TargetInstrDesc &TID = get(map[i].opcode);
-      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg);
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
       AddDefaultPredicate(MI);
       return true;
     }
@@ -155,7 +157,7 @@ DefinesPredicate(MachineInstr *MI,
 
   const MachineOperand &MO = MI->getOperand(0);
 
-  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass)
+  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
     return false;
 
   Pred.push_back(MO);
@@ -178,13 +180,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   MachineBasicBlock::const_iterator iter = MBB.end();
   const MachineInstr& instLast1 = *--iter;
-  const TargetInstrDesc &desc1 = instLast1.getDesc();
+  const MCInstrDesc &desc1 = instLast1.getDesc();
   // for special case that MBB has only 1 instruction
   const bool IsSizeOne = MBB.size() == 1;
   // if IsSizeOne is true, *--iter and instLast2 are invalid
   // we put a dummy value in instLast2 and desc2 since they are used
   const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
-  const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+  const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
 
   DEBUG(dbgs() << "\n");
   DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -288,6 +290,77 @@ InsertBranch(MachineBasicBlock &MBB,
   }
 }
 
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MII,
+                                     unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKSTOREI16;
+  }  else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKSTOREF32;
+  }  else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKSTOREF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the store instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addFrameIndex(FrameIdx);
+  MIB.addReg(SrcReg);
+
+  AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MII,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKLOADI16;
+  } else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKLOADF32;
+  } else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKLOADF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the load instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addReg(DestReg);
+  MIB.addFrameIndex(FrameIdx);
+
+  AddDefaultPredicate(MIB);
+}
+
 // static helper routines
 
 MachineSDNode *PTXInstrInfo::
@@ -316,7 +389,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
 }
 
 bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
-  const TargetInstrDesc &desc = inst.getDesc();
+  const MCInstrDesc &desc = inst.getDesc();
   return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
 }
 
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index a04be7728f88..871f1ac8d376 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -17,6 +17,9 @@
 #include "PTXRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
 namespace llvm {
 class PTXTargetMachine;
 
@@ -24,7 +27,7 @@ class MachineSDNode;
 class SDValue;
 class SelectionDAG;
 
-class PTXInstrInfo : public TargetInstrInfoImpl {
+class PTXInstrInfo : public PTXGenInstrInfo {
 private:
   const PTXRegisterInfo RI;
   PTXTargetMachine &TM;
@@ -84,6 +87,29 @@ public:
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
 
+  // Memory operand folding for spills
+  // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+  //       loadRegFromStackSlot.  Doing so will get rid of the "stack" registers
+  //       we currently use to spill, though I doubt the overall effect on ptxas
+  //       output will be large.  I have yet to see a case where ptxas is unable
+  //       to see through the "stack" register usage and hence generates
+  //       efficient code anyway.
+  // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+  //                                             MachineInstr* MI,
+  //                                       const SmallVectorImpl<unsigned> &Ops,
+  //                                             int FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+                                   MachineBasicBlock::iterator MII,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass* RC,
+                                   const TargetRegisterInfo* TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MII,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
   // static helper routines
 
   static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index d5d08bed2247..6bfe906d40ab 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
 def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
 
 // Shader Model Support
-def SupportsSM13       : Predicate<"getSubtarget().supportsSM13()">;
-def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
-def SupportsSM20       : Predicate<"getSubtarget().supportsSM20()">;
-def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
 
 // PTX Version Support
 def SupportsPTX21       : Predicate<"getSubtarget().supportsPTX21()">;
@@ -143,11 +143,11 @@ def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
 // Address operands
 def MEMri32 : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops RRegu32, i32imm);
+  let MIOperandInfo = (ops RegI32, i32imm);
 }
 def MEMri64 : Operand<i64> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops RRegu64, i64imm);
+  let MIOperandInfo = (ops RegI64, i64imm);
 }
 def MEMii32 : Operand<i32> {
   let PrintMethod = "printMemOperand";
@@ -163,6 +163,10 @@ def MEMpi : Operand<i32> {
   let PrintMethod = "printParamOperand";
   let MIOperandInfo = (ops i32imm);
 }
+def MEMret : Operand<i32> {
+  let PrintMethod = "printReturnOperand";
+  let MIOperandInfo = (ops i32imm);
+}
 
 // Branch & call targets have OtherVT type.
 def brtarget   : Operand<OtherVT>;
@@ -180,181 +184,190 @@ def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
 def PTXexit
   : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
 def PTXret
-  : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+  : SDNode<"PTXISD::RET",  SDTNone,
+           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def PTXcopyaddress
   : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
 
+// Load/store .param space
+def PTXloadparam
+  : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+  : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
 
 //===- Floating-Point Instructions - 2 Operand Form -----------------------===//
 multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
-  def rr32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      !strconcat(opcstr, ".f32\t$d, $a"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a))]>;
-  def ri32 : InstPTX<(outs RRegf32:$d),
+                     [(set RegF32:$d, (opnode RegF32:$a))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
                      (ins f32imm:$a),
                      !strconcat(opcstr, ".f32\t$d, $a"),
-                     [(set RRegf32:$d, (opnode fpimm:$a))]>;
-  def rr64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+                     [(set RegF32:$d, (opnode fpimm:$a))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      !strconcat(opcstr, ".f64\t$d, $a"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a))]>;
-  def ri64 : InstPTX<(outs RRegf64:$d),
+                     [(set RegF64:$d, (opnode RegF64:$a))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
                      (ins f64imm:$a),
                      !strconcat(opcstr, ".f64\t$d, $a"),
-                     [(set RRegf64:$d, (opnode fpimm:$a))]>;
+                     [(set RegF64:$d, (opnode fpimm:$a))]>;
 }
 
 //===- Floating-Point Instructions - 3 Operand Form -----------------------===//
 multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
-  def rr32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a, RRegf32:$b),
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, RegF32:$b),
                      !strconcat(opcstr, ".f32\t$d, $a, $b"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
-  def ri32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a, f32imm:$b),
+                     [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, f32imm:$b),
                      !strconcat(opcstr, ".f32\t$d, $a, $b"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
-  def rr64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a, RRegf64:$b),
+                     [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, RegF64:$b),
                      !strconcat(opcstr, ".f64\t$d, $a, $b"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
-  def ri64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a, f64imm:$b),
+                     [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, f64imm:$b),
                      !strconcat(opcstr, ".f64\t$d, $a, $b"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
+                     [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
 }
 
 //===- Floating-Point Instructions - 4 Operand Form -----------------------===//
 multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
-  def rrr32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+  def rrr32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, RegF32:$c),
                       !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
-                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
-                                                          RRegf32:$b),
-                                                 RRegf32:$c))]>;
-  def rri32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
+                                                 RegF32:$c))]>;
+  def rri32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, f32imm:$c),
                       !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
-                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
-                                                          RRegf32:$b),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
                                                  fpimm:$c))]>;
-  def rrr64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+  def rrr64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, RegF64:$c),
                       !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
-                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
-                                                          RRegf64:$b),
-                                                 RRegf64:$c))]>;
-  def rri64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
+                                                 RegF64:$c))]>;
+  def rri64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, f64imm:$c),
                       !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
-                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
-                                                          RRegf64:$b),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
                                                  fpimm:$c))]>;
 }
 
 multiclass INT3<string opcstr, SDNode opnode> {
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, ".u16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, ".u16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, ".u32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, ".u32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, ".u64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, ".u64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
 }
 
 multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
-  def ripreds : InstPTX<(outs Preds:$d),
-                     (ins Preds:$a, i1imm:$b),
+  def ripreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, i1imm:$b),
                      !strconcat(opcstr, ".pred\t$d, $a, $b"),
-                     [(set Preds:$d, (opnode Preds:$a, imm:$b))]>;
-  def rrpreds : InstPTX<(outs Preds:$d),
-                     (ins Preds:$a, Preds:$b),
+                     [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+  def rrpreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, RegPred:$b),
                      !strconcat(opcstr, ".pred\t$d, $a, $b"),
-                     [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>;
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+                     [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, ".b16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, ".b16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, ".b32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, ".b32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, ".b64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, ".b64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
 }
 
 multiclass INT3ntnc<string opcstr, SDNode opnode> {
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
-  def ir16 : InstPTX<(outs RRegu16:$d),
-                     (ins i16imm:$a, RRegu16:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+  def ir16 : InstPTX<(outs RegI16:$d),
+                     (ins i16imm:$a, RegI16:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>;
-  def ir32 : InstPTX<(outs RRegu32:$d),
-                     (ins i32imm:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+  def ir32 : InstPTX<(outs RegI32:$d),
+                     (ins i32imm:$a, RegI32:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
-  def ir64 : InstPTX<(outs RRegu64:$d),
-                     (ins i64imm:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+  def ir64 : InstPTX<(outs RegI64:$d),
+                     (ins i64imm:$a, RegI64:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
+                     [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
 }
 
 multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
@@ -362,63 +375,63 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
   // TODO support 5-operand format: p|q, a, b, c
 
   def rr
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
   def ri
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
 
   def rr_and_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_and_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
   def rr_or_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_or_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
   def rr_xor_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_xor_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
 
   def rr_and_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_and_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
   def rr_or_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_or_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
   def rr_xor_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_xor_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
 }
 
 multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
@@ -426,74 +439,74 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
   // TODO support 5-operand format: p|q, a, b, c
 
   def rr_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
   def rr_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
 
   def rr_and_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_and_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_or_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_or_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_xor_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_xor_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_and_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_and_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 
   def rr_or_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_or_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 
   def rr_xor_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_xor_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 }
 
 multiclass PTX_SELP<RegisterClass RC, string regclsname> {
   def rr
-    : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c),
+    : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
               !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
-              [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>;
+              [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
 }
 
 multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
@@ -524,11 +537,11 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo
 }
 
 multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
-  defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
-  defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
-  defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
-  defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
-  defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+  defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+  defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+  defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+  defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+  defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
 }
 
 multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
@@ -559,11 +572,11 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st
 }
 
 multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
-  defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
-  defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
-  defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
-  defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
-  defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
+  defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+  defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+  defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+  defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+  defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -584,44 +597,59 @@ defm REM : INT3<"rem", urem>;
 defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
 
 // Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
-
-// TODO: Allow user selection of rounding modes for fdiv.
-// For division, we need to have f32 and f64 differently.
-// For f32, we just always use .approx since it is supported on all hardware
-// for PTX 1.4+, which is our minimum target.
-def FDIVrr32 : InstPTX<(outs RRegf32:$d),
-                       (ins RRegf32:$a, RRegf32:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
-def FDIVri32 : InstPTX<(outs RRegf32:$d),
-                       (ins RRegf32:$a, f32imm:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
-
-// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
-def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, RRegf64:$b),
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO: 
+//     - Allow user selection of rounding modes for fdiv
+//     - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+
+def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
                            "div.rn.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
-                   Requires<[SupportsSM13]>;
-def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, f64imm:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
                            "div.rn.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
-                   Requires<[SupportsSM13]>;
-def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, RRegf64:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
                            "div.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
-                   Requires<[DoesNotSupportSM13]>;
-def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, f64imm:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
                            "div.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
-                   Requires<[DoesNotSupportSM13]>;
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
 
 
 
@@ -633,40 +661,42 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
 // In the short term, mad is supported on all PTX versions and we use a
 // default rounding mode no matter what shader model or PTX version.
 // TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
+                Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
+            Requires<[FMadNoRoundingMode, SupportsFMA]>;
 
 ///===- Floating-Point Intrinsic Instructions -----------------------------===//
 
-def FSQRT32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a),
+def FSQRT32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a),
                       "sqrt.rn.f32\t$d, $a",
-                      [(set RRegf32:$d, (fsqrt RRegf32:$a))]>;
+                      [(set RegF32:$d, (fsqrt RegF32:$a))]>;
 
-def FSQRT64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a),
+def FSQRT64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a),
                       "sqrt.rn.f64\t$d, $a",
-                      [(set RRegf64:$d, (fsqrt RRegf64:$a))]>;
+                      [(set RegF64:$d, (fsqrt RegF64:$a))]>;
 
-def FSIN32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+def FSIN32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      "sin.approx.f32\t$d, $a",
-                     [(set RRegf32:$d, (fsin RRegf32:$a))]>;
+                     [(set RegF32:$d, (fsin RegF32:$a))]>;
 
-def FSIN64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+def FSIN64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      "sin.approx.f64\t$d, $a",
-                     [(set RRegf64:$d, (fsin RRegf64:$a))]>;
+                     [(set RegF64:$d, (fsin RegF64:$a))]>;
 
-def FCOS32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+def FCOS32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      "cos.approx.f32\t$d, $a",
-                     [(set RRegf32:$d, (fcos RRegf32:$a))]>;
+                     [(set RegF32:$d, (fcos RegF32:$a))]>;
 
-def FCOS64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+def FCOS64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      "cos.approx.f64\t$d, $a",
-                     [(set RRegf64:$d, (fcos RRegf64:$a))]>;
+                     [(set RegF64:$d, (fcos RegF64:$a))]>;
 
 
 ///===- Comparison and Selection Instructions -----------------------------===//
@@ -675,56 +705,68 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
 
 // Compare u16
 
-defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ,  "eq">;
-defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE,  "ne">;
-defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
-defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
-defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
-defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ,  "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE,  "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT,  "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE,  "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT,  "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE,  "ge">;
 
 // Compare u32
 
-defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ,  "eq">;
-defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE,  "ne">;
-defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
-defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
-defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
-defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ,  "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE,  "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT,  "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE,  "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT,  "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE,  "ge">;
 
 // Compare u64
 
-defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ,  "eq">;
-defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE,  "ne">;
-defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
-defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
-defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
-defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ,  "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE,  "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT,  "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE,  "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT,  "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE,  "ge">;
 
 // Compare f32
 
-defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
 
 // Compare f64
 
-defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
 
 // .selp
 
-defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">;
-defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">;
-defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">;
-defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">;
-defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">;
+defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
 
 ///===- Logic and Shift Instructions --------------------------------------===//
 
@@ -740,47 +782,47 @@ defm XOR : PTX_LOGIC<"xor", xor>;
 
 let neverHasSideEffects = 1 in {
   def MOVPREDrr
-    : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
+    : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
   def MOVU16rr
-    : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+    : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
   def MOVU32rr
-    : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+    : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
   def MOVU64rr
-    : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+    : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
   def MOVF32rr
-    : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+    : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
   def MOVF64rr
-    : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
+    : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVPREDri
-    : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
-              [(set Preds:$d, imm:$a)]>;
+    : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+              [(set RegPred:$d, imm:$a)]>;
   def MOVU16ri
-    : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
-              [(set RRegu16:$d, imm:$a)]>;
+    : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+              [(set RegI16:$d, imm:$a)]>;
   def MOVU32ri
-    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
-              [(set RRegu32:$d, imm:$a)]>;
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, imm:$a)]>;
   def MOVU64ri
-    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
-              [(set RRegu64:$d, imm:$a)]>;
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, imm:$a)]>;
   def MOVF32ri
-    : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
-              [(set RRegf32:$d, fpimm:$a)]>;
+    : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+              [(set RegF32:$d, fpimm:$a)]>;
   def MOVF64ri
-    : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
-              [(set RRegf64:$d, fpimm:$a)]>;
+    : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+              [(set RegF64:$d, fpimm:$a)]>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVaddr32
-    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
-              [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
   def MOVaddr64
-    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
-              [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
 }
 
 // Loads
@@ -789,17 +831,48 @@ defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;
 defm LDl : PTX_LD_ALL<"ld.local",  load_local>;
 defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
 
-// This is a special instruction that is manually inserted for kernel parameters
-def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
-                      "ld.param.u16\t$d, [$a]", []>;
-def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
-                      "ld.param.u32\t$d, [$a]", []>;
-def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
-                      "ld.param.u64\t$d, [$a]", []>;
-def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
-                      "ld.param.f32\t$d, [$a]", []>;
-def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
-                      "ld.param.f64\t$d, [$a]", []>;
+// These instructions are used to load/store from the .param space for
+// device and kernel parameters
+
+let hasSideEffects = 1 in {
+  def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
+                         "ld.param.pred\t$d, [$a]",
+                         [(set RegPred:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU16  : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
+                         "ld.param.u16\t$d, [$a]",
+                         [(set RegI16:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU32  : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
+                         "ld.param.u32\t$d, [$a]",
+                         [(set RegI32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU64  : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
+                         "ld.param.u64\t$d, [$a]",
+                         [(set RegI64:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF32  : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
+                         "ld.param.f32\t$d, [$a]",
+                         [(set RegF32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF64  : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
+                         "ld.param.f64\t$d, [$a]",
+                         [(set RegF64:$d, (PTXloadparam timm:$a))]>;
+
+  def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
+                         "st.param.pred\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegPred:$a)]>;
+  def STpiU16  : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
+                         "st.param.u16\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI16:$a)]>;
+  def STpiU32  : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
+                         "st.param.u32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI32:$a)]>;
+  def STpiU64  : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
+                         "st.param.u64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI64:$a)]>;
+  def STpiF32  : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
+                         "st.param.f32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF32:$a)]>;
+  def STpiF64  : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
+                         "st.param.f64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF64:$a)]>;
+}
 
 // Stores
 defm STg : PTX_ST_ALL<"st.global", store_global>;
@@ -811,136 +884,174 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
 // TODO: Do something with st.param if/when it is needed.
 
 // Conversion to pred
-
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero.  This follows
+// the C convention that any non-zero value is equivalent to 'true'.
 def CVT_pred_u16
-  : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
-            [(set Preds:$d, (trunc RRegu16:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI16:$a))]>;
 
 def CVT_pred_u32
-  : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
-            [(set Preds:$d, (trunc RRegu32:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI32:$a))]>;
 
 def CVT_pred_u64
-  : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
-            [(set Preds:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI64:$a))]>;
 
 def CVT_pred_f32
-  : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a",
-            [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_pred_f64
-  : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a",
-            [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u16
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+def CVT_u16_preda
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (anyext RegPred:$a))]>;
 
 def CVT_u16_pred
-  : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
-            [(set RRegu16:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (zext RegPred:$a))]>;
+
+def CVT_u16_preds
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (sext RegPred:$a))]>;
 
 def CVT_u16_u32
-  : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
-            [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
+            [(set RegI16:$d, (trunc RegI32:$a))]>;
 
 def CVT_u16_u64
-  : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
-            [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
+            [(set RegI16:$d, (trunc RegI64:$a))]>;
 
 def CVT_u16_f32
-  : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a",
-            [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u16_f64
-  : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a",
-            [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u32
 
 def CVT_u32_pred
-  : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
-            [(set RRegu32:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (zext RegPred:$a))]>;
+
+def CVT_u32_b16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (anyext RegI16:$a))]>;
 
 def CVT_u32_u16
-  : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
-            [(set RRegu32:$d, (zext RRegu16:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (zext RegI16:$a))]>;
+
+def CVT_u32_preds
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (sext RegPred:$a))]>;
+
+def CVT_u32_s16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
+            [(set RegI32:$d, (sext RegI16:$a))]>;
 
 def CVT_u32_u64
-  : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
-            [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
+            [(set RegI32:$d, (trunc RegI64:$a))]>;
 
 def CVT_u32_f32
-  : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a",
-            [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u32_f64
-  : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a",
-            [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u64
 
 def CVT_u64_pred
-  : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
-            [(set RRegu64:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (zext RegPred:$a))]>;
+
+def CVT_u64_preds
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (sext RegPred:$a))]>;
 
 def CVT_u64_u16
-  : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
-            [(set RRegu64:$d, (zext RRegu16:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
+            [(set RegI64:$d, (zext RegI16:$a))]>;
+
+def CVT_u64_s16
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
+            [(set RegI64:$d, (sext RegI16:$a))]>;
 
 def CVT_u64_u32
-  : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
-            [(set RRegu64:$d, (zext RRegu32:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
+            [(set RegI64:$d, (zext RegI32:$a))]>;
+
+def CVT_u64_s32
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
+            [(set RegI64:$d, (sext RegI32:$a))]>;
 
 def CVT_u64_f32
-  : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a",
-            [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u64_f64
-  : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a",
-            [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to f32
 
 def CVT_f32_pred
-  : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+            "selp.f32\t$d, 0F3F800000, 0F00000000, $a",  // 1.0
+            [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f32_u16
-  : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
 
 def CVT_f32_u32
-  : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
 
 def CVT_f32_u64
-  : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
 
 def CVT_f32_f64
-  : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a",
-            [(set RRegf32:$d, (fround RRegf64:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
+            [(set RegF32:$d, (fround RegF64:$a))]>;
 
 // Conversion to f64
 
 def CVT_f64_pred
-  : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegPred:$a), 
+            "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a",  // 1.0
+            [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f64_u16
-  : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
 
 def CVT_f64_u32
-  : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
 
 def CVT_f64_u64
-  : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
 
 def CVT_f64_f32
-  : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
-            [(set RRegf64:$d, (fextend RRegf32:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
+            [(set RegF64:$d, (fextend RegF32:$a))]>;
 
 ///===- Control Flow Instructions -----------------------------------------===//
 
@@ -951,7 +1062,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 
 let isBranch = 1, isTerminator = 1 in {
   // FIXME: The pattern part is blank because I cannot (or do not yet know
-  // how to) use the first operand of PredicateOperand (a Preds register) here
+  // how to) use the first operand of PredicateOperand (a RegPred register) here
   def BRAdp
     : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
               [/*(brcond pred:$_p, bb:$d)*/]>;
@@ -962,6 +1073,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
 }
 
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+                            "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+                            "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+                            "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+                            "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+                            "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+                           "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+                           "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+                           "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+                           "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+                           "mov.f64\t$d, s$a", []>;
+
 ///===- Intrinsic Instructions --------------------------------------------===//
 
 include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 320934a2228c..8d97909d339a 100644
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -14,14 +14,14 @@
 // PTX Special Purpose Register Accessor Intrinsics
 
 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
-  : InstPTX<(outs RRegu64:$d), (ins),
+  : InstPTX<(outs RegI64:$d), (ins),
             !strconcat("mov.u64\t$d, %", regname),
-            [(set RRegu64:$d, (intop))]>;
+            [(set RegI64:$d, (intop))]>;
 
 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
-  : InstPTX<(outs RRegu32:$d), (ins),
+  : InstPTX<(outs RegI32:$d), (ins),
             !strconcat("mov.u32\t$d, %", regname),
-            [(set RRegu32:$d, (intop))]>;
+            [(set RegI32:$d, (intop))]>;
 
 // TODO Add read vector-version of special registers
 
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 1574670b6e9b..b13a3dace130 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
@@ -115,7 +114,8 @@ public:
 
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label);
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
 
@@ -260,7 +260,8 @@ void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
 
 void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                                 const MCSymbol *LastLabel,
-                                                const MCSymbol *Label) {
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
   report_fatal_error("Unimplemented.");
 }
 
@@ -367,7 +368,7 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     int64_t IntValue;
     if (!Value->EvaluateAsAbsolute(IntValue))
       report_fatal_error("Don't know how to emit this value.");
-    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+    if (getContext().getAsmInfo().isLittleEndian()) {
       EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
       EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
     } else {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index c5e191007239..6fe9e6c3f657 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -54,8 +54,6 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
 
-  unsigned retreg = MFI->retReg();
-
   DEBUG(dbgs()
         << "PTX::NoRegister == " << PTX::NoRegister << "\n"
         << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
@@ -68,15 +66,13 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
   // FIXME: This is a slow linear scanning
   for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
     if (MRI.isPhysRegUsed(reg) &&
-        reg != retreg &&
+        !MFI->isRetReg(reg) &&
         (MFI->isKernel() || !MFI->isArgReg(reg)))
       MFI->addLocalVarReg(reg);
 
   // Notify MachineFunctionInfo that I've done adding local var reg
   MFI->doneAddLocalVar();
 
-  DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
-
   DEBUG(for (PTXMachineFunctionInfo::reg_iterator
              i = MFI->argRegBegin(), e = MFI->argRegEnd();
              i != e; ++i)
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 81df1c236cb2..9d65f5bd1ade 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -15,6 +15,7 @@
 #define PTX_MACHINE_FUNCTION_INFO_H
 
 #include "PTX.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -25,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo {
 private:
   bool is_kernel;
   std::vector<unsigned> reg_arg, reg_local_var;
-  unsigned reg_ret;
+  std::vector<unsigned> reg_ret;
   bool _isDoneAddArg;
 
 public:
@@ -39,19 +40,22 @@ public:
 
   void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
   void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
-  void setRetReg(unsigned reg) { reg_ret = reg; }
+  void addRetReg(unsigned reg) {
+    if (!isRetReg(reg)) {
+      reg_ret.push_back(reg);
+    }
+  }
 
   void doneAddArg(void) {
     _isDoneAddArg = true;
   }
   void doneAddLocalVar(void) {}
 
-  bool isDoneAddArg(void) { return _isDoneAddArg; }
-
   bool isKernel() const { return is_kernel; }
 
   typedef std::vector<unsigned>::const_iterator         reg_iterator;
   typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
+  typedef std::vector<unsigned>::const_iterator         ret_iterator;
 
   bool         argRegEmpty() const { return reg_arg.empty(); }
   int          getNumArg() const { return reg_arg.size(); }
@@ -64,12 +68,19 @@ public:
   reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
   reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
 
-  unsigned retReg() const { return reg_ret; }
+  bool         retRegEmpty() const { return reg_ret.empty(); }
+  int          getNumRet() const { return reg_ret.size(); }
+  ret_iterator retRegBegin() const { return reg_ret.begin(); }
+  ret_iterator retRegEnd()   const { return reg_ret.end(); }
 
   bool isArgReg(unsigned reg) const {
     return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
   }
 
+  bool isRetReg(unsigned reg) const {
+    return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+  }
+
   bool isLocalVarReg(unsigned reg) const {
     return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
       != reg_local_var.end();
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 0f3e7bc2c3a7..cb56ea98a2b8 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -13,7 +13,39 @@
 
 #include "PTX.h"
 #include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
 
 using namespace llvm;
 
-#include "PTXGenRegisterInfo.inc"
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+                                 const TargetInstrInfo &TII)
+  : PTXGenRegisterInfo() {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj,
+                                          RegScavenger *RS) const {
+  unsigned Index;
+  MachineInstr& MI = *II;
+
+  Index = 0;
+  while (!MI.getOperand(Index).isFI()) {
+    ++Index;
+    assert(Index < MI.getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(Index).getIndex();
+
+  DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+  DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+  DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+  // This frame index is post stack slot re-use assignments
+  MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index dc5635238106..0b63cb6d458e 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -17,7 +17,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/BitVector.h"
 
-#include "PTXGenRegisterInfo.h.inc"
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
 
 namespace llvm {
 class PTXTargetMachine;
@@ -25,7 +26,7 @@ class MachineFunction;
 
 struct PTXRegisterInfo : public PTXGenRegisterInfo {
   PTXRegisterInfo(PTXTargetMachine &TM,
-                  const TargetInstrInfo &TII) {}
+                  const TargetInstrInfo &TII);
 
   virtual const unsigned
     *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
@@ -38,11 +39,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
     return Reserved; // reserve no regs
   }
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj,
-                                   RegScavenger *RS = NULL) const {
-    llvm_unreachable("PTX does not support general function call");
-  }
+                                   RegScavenger *RS = NULL) const;
 
   virtual unsigned getFrameRegister(const MachineFunction &MF) const {
     llvm_unreachable("PTX does not have a frame register");
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index f6161419fec1..1313d248325e 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -1,3 +1,4 @@
+
 //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -21,16 +22,16 @@ class PTXReg<string n> : Register<n> {
 
 ///===- Predicate Registers -----------------------------------------------===//
 
-def P0  : PTXReg<"p0">;
-def P1  : PTXReg<"p1">;
-def P2  : PTXReg<"p2">;
-def P3  : PTXReg<"p3">;
-def P4  : PTXReg<"p4">;
-def P5  : PTXReg<"p5">;
-def P6  : PTXReg<"p6">;
-def P7  : PTXReg<"p7">;
-def P8  : PTXReg<"p8">;
-def P9  : PTXReg<"p9">;
+def P0 : PTXReg<"p0">;
+def P1 : PTXReg<"p1">;
+def P2 : PTXReg<"p2">;
+def P3 : PTXReg<"p3">;
+def P4 : PTXReg<"p4">;
+def P5 : PTXReg<"p5">;
+def P6 : PTXReg<"p6">;
+def P7 : PTXReg<"p7">;
+def P8 : PTXReg<"p8">;
+def P9 : PTXReg<"p9">;
 def P10 : PTXReg<"p10">;
 def P11 : PTXReg<"p11">;
 def P12 : PTXReg<"p12">;
@@ -85,19 +86,83 @@ def P60 : PTXReg<"p60">;
 def P61 : PTXReg<"p61">;
 def P62 : PTXReg<"p62">;
 def P63 : PTXReg<"p63">;
+def P64 : PTXReg<"p64">;
+def P65 : PTXReg<"p65">;
+def P66 : PTXReg<"p66">;
+def P67 : PTXReg<"p67">;
+def P68 : PTXReg<"p68">;
+def P69 : PTXReg<"p69">;
+def P70 : PTXReg<"p70">;
+def P71 : PTXReg<"p71">;
+def P72 : PTXReg<"p72">;
+def P73 : PTXReg<"p73">;
+def P74 : PTXReg<"p74">;
+def P75 : PTXReg<"p75">;
+def P76 : PTXReg<"p76">;
+def P77 : PTXReg<"p77">;
+def P78 : PTXReg<"p78">;
+def P79 : PTXReg<"p79">;
+def P80 : PTXReg<"p80">;
+def P81 : PTXReg<"p81">;
+def P82 : PTXReg<"p82">;
+def P83 : PTXReg<"p83">;
+def P84 : PTXReg<"p84">;
+def P85 : PTXReg<"p85">;
+def P86 : PTXReg<"p86">;
+def P87 : PTXReg<"p87">;
+def P88 : PTXReg<"p88">;
+def P89 : PTXReg<"p89">;
+def P90 : PTXReg<"p90">;
+def P91 : PTXReg<"p91">;
+def P92 : PTXReg<"p92">;
+def P93 : PTXReg<"p93">;
+def P94 : PTXReg<"p94">;
+def P95 : PTXReg<"p95">;
+def P96 : PTXReg<"p96">;
+def P97 : PTXReg<"p97">;
+def P98 : PTXReg<"p98">;
+def P99 : PTXReg<"p99">;
+def P100 : PTXReg<"p100">;
+def P101 : PTXReg<"p101">;
+def P102 : PTXReg<"p102">;
+def P103 : PTXReg<"p103">;
+def P104 : PTXReg<"p104">;
+def P105 : PTXReg<"p105">;
+def P106 : PTXReg<"p106">;
+def P107 : PTXReg<"p107">;
+def P108 : PTXReg<"p108">;
+def P109 : PTXReg<"p109">;
+def P110 : PTXReg<"p110">;
+def P111 : PTXReg<"p111">;
+def P112 : PTXReg<"p112">;
+def P113 : PTXReg<"p113">;
+def P114 : PTXReg<"p114">;
+def P115 : PTXReg<"p115">;
+def P116 : PTXReg<"p116">;
+def P117 : PTXReg<"p117">;
+def P118 : PTXReg<"p118">;
+def P119 : PTXReg<"p119">;
+def P120 : PTXReg<"p120">;
+def P121 : PTXReg<"p121">;
+def P122 : PTXReg<"p122">;
+def P123 : PTXReg<"p123">;
+def P124 : PTXReg<"p124">;
+def P125 : PTXReg<"p125">;
+def P126 : PTXReg<"p126">;
+def P127 : PTXReg<"p127">;
 
-///===- 16-bit Integer Registers ------------------------------------------===//
+///===- 16-Bit Registers --------------------------------------------------===//
 
-def RH0  : PTXReg<"rh0">;
-def RH1  : PTXReg<"rh1">;
-def RH2  : PTXReg<"rh2">;
-def RH3  : PTXReg<"rh3">;
-def RH4  : PTXReg<"rh4">;
-def RH5  : PTXReg<"rh5">;
-def RH6  : PTXReg<"rh6">;
-def RH7  : PTXReg<"rh7">;
-def RH8  : PTXReg<"rh8">;
-def RH9  : PTXReg<"rh9">;
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
 def RH10 : PTXReg<"rh10">;
 def RH11 : PTXReg<"rh11">;
 def RH12 : PTXReg<"rh12">;
@@ -152,20 +217,83 @@ def RH60 : PTXReg<"rh60">;
 def RH61 : PTXReg<"rh61">;
 def RH62 : PTXReg<"rh62">;
 def RH63 : PTXReg<"rh63">;
+def RH64 : PTXReg<"rh64">;
+def RH65 : PTXReg<"rh65">;
+def RH66 : PTXReg<"rh66">;
+def RH67 : PTXReg<"rh67">;
+def RH68 : PTXReg<"rh68">;
+def RH69 : PTXReg<"rh69">;
+def RH70 : PTXReg<"rh70">;
+def RH71 : PTXReg<"rh71">;
+def RH72 : PTXReg<"rh72">;
+def RH73 : PTXReg<"rh73">;
+def RH74 : PTXReg<"rh74">;
+def RH75 : PTXReg<"rh75">;
+def RH76 : PTXReg<"rh76">;
+def RH77 : PTXReg<"rh77">;
+def RH78 : PTXReg<"rh78">;
+def RH79 : PTXReg<"rh79">;
+def RH80 : PTXReg<"rh80">;
+def RH81 : PTXReg<"rh81">;
+def RH82 : PTXReg<"rh82">;
+def RH83 : PTXReg<"rh83">;
+def RH84 : PTXReg<"rh84">;
+def RH85 : PTXReg<"rh85">;
+def RH86 : PTXReg<"rh86">;
+def RH87 : PTXReg<"rh87">;
+def RH88 : PTXReg<"rh88">;
+def RH89 : PTXReg<"rh89">;
+def RH90 : PTXReg<"rh90">;
+def RH91 : PTXReg<"rh91">;
+def RH92 : PTXReg<"rh92">;
+def RH93 : PTXReg<"rh93">;
+def RH94 : PTXReg<"rh94">;
+def RH95 : PTXReg<"rh95">;
+def RH96 : PTXReg<"rh96">;
+def RH97 : PTXReg<"rh97">;
+def RH98 : PTXReg<"rh98">;
+def RH99 : PTXReg<"rh99">;
+def RH100 : PTXReg<"rh100">;
+def RH101 : PTXReg<"rh101">;
+def RH102 : PTXReg<"rh102">;
+def RH103 : PTXReg<"rh103">;
+def RH104 : PTXReg<"rh104">;
+def RH105 : PTXReg<"rh105">;
+def RH106 : PTXReg<"rh106">;
+def RH107 : PTXReg<"rh107">;
+def RH108 : PTXReg<"rh108">;
+def RH109 : PTXReg<"rh109">;
+def RH110 : PTXReg<"rh110">;
+def RH111 : PTXReg<"rh111">;
+def RH112 : PTXReg<"rh112">;
+def RH113 : PTXReg<"rh113">;
+def RH114 : PTXReg<"rh114">;
+def RH115 : PTXReg<"rh115">;
+def RH116 : PTXReg<"rh116">;
+def RH117 : PTXReg<"rh117">;
+def RH118 : PTXReg<"rh118">;
+def RH119 : PTXReg<"rh119">;
+def RH120 : PTXReg<"rh120">;
+def RH121 : PTXReg<"rh121">;
+def RH122 : PTXReg<"rh122">;
+def RH123 : PTXReg<"rh123">;
+def RH124 : PTXReg<"rh124">;
+def RH125 : PTXReg<"rh125">;
+def RH126 : PTXReg<"rh126">;
+def RH127 : PTXReg<"rh127">;
 
+///===- 32-Bit Registers --------------------------------------------------===//
 
-///===- 32-bit Integer Registers ------------------------------------------===//
-
-def R0  : PTXReg<"r0">;
-def R1  : PTXReg<"r1">;
-def R2  : PTXReg<"r2">;
-def R3  : PTXReg<"r3">;
-def R4  : PTXReg<"r4">;
-def R5  : PTXReg<"r5">;
-def R6  : PTXReg<"r6">;
-def R7  : PTXReg<"r7">;
-def R8  : PTXReg<"r8">;
-def R9  : PTXReg<"r9">;
+def R0 : PTXReg<"r0">;
+def R1 : PTXReg<"r1">;
+def R2 : PTXReg<"r2">;
+def R3 : PTXReg<"r3">;
+def R4 : PTXReg<"r4">;
+def R5 : PTXReg<"r5">;
+def R6 : PTXReg<"r6">;
+def R7 : PTXReg<"r7">;
+def R8 : PTXReg<"r8">;
+def R9 : PTXReg<"r9">;
 def R10 : PTXReg<"r10">;
 def R11 : PTXReg<"r11">;
 def R12 : PTXReg<"r12">;
@@ -220,20 +348,83 @@ def R60 : PTXReg<"r60">;
 def R61 : PTXReg<"r61">;
 def R62 : PTXReg<"r62">;
 def R63 : PTXReg<"r63">;
+def R64 : PTXReg<"r64">;
+def R65 : PTXReg<"r65">;
+def R66 : PTXReg<"r66">;
+def R67 : PTXReg<"r67">;
+def R68 : PTXReg<"r68">;
+def R69 : PTXReg<"r69">;
+def R70 : PTXReg<"r70">;
+def R71 : PTXReg<"r71">;
+def R72 : PTXReg<"r72">;
+def R73 : PTXReg<"r73">;
+def R74 : PTXReg<"r74">;
+def R75 : PTXReg<"r75">;
+def R76 : PTXReg<"r76">;
+def R77 : PTXReg<"r77">;
+def R78 : PTXReg<"r78">;
+def R79 : PTXReg<"r79">;
+def R80 : PTXReg<"r80">;
+def R81 : PTXReg<"r81">;
+def R82 : PTXReg<"r82">;
+def R83 : PTXReg<"r83">;
+def R84 : PTXReg<"r84">;
+def R85 : PTXReg<"r85">;
+def R86 : PTXReg<"r86">;
+def R87 : PTXReg<"r87">;
+def R88 : PTXReg<"r88">;
+def R89 : PTXReg<"r89">;
+def R90 : PTXReg<"r90">;
+def R91 : PTXReg<"r91">;
+def R92 : PTXReg<"r92">;
+def R93 : PTXReg<"r93">;
+def R94 : PTXReg<"r94">;
+def R95 : PTXReg<"r95">;
+def R96 : PTXReg<"r96">;
+def R97 : PTXReg<"r97">;
+def R98 : PTXReg<"r98">;
+def R99 : PTXReg<"r99">;
+def R100 : PTXReg<"r100">;
+def R101 : PTXReg<"r101">;
+def R102 : PTXReg<"r102">;
+def R103 : PTXReg<"r103">;
+def R104 : PTXReg<"r104">;
+def R105 : PTXReg<"r105">;
+def R106 : PTXReg<"r106">;
+def R107 : PTXReg<"r107">;
+def R108 : PTXReg<"r108">;
+def R109 : PTXReg<"r109">;
+def R110 : PTXReg<"r110">;
+def R111 : PTXReg<"r111">;
+def R112 : PTXReg<"r112">;
+def R113 : PTXReg<"r113">;
+def R114 : PTXReg<"r114">;
+def R115 : PTXReg<"r115">;
+def R116 : PTXReg<"r116">;
+def R117 : PTXReg<"r117">;
+def R118 : PTXReg<"r118">;
+def R119 : PTXReg<"r119">;
+def R120 : PTXReg<"r120">;
+def R121 : PTXReg<"r121">;
+def R122 : PTXReg<"r122">;
+def R123 : PTXReg<"r123">;
+def R124 : PTXReg<"r124">;
+def R125 : PTXReg<"r125">;
+def R126 : PTXReg<"r126">;
+def R127 : PTXReg<"r127">;
 
+///===- 64-Bit Registers --------------------------------------------------===//
 
-///===- 64-bit Integer Registers ------------------------------------------===//
-
-def RD0  : PTXReg<"rd0">;
-def RD1  : PTXReg<"rd1">;
-def RD2  : PTXReg<"rd2">;
-def RD3  : PTXReg<"rd3">;
-def RD4  : PTXReg<"rd4">;
-def RD5  : PTXReg<"rd5">;
-def RD6  : PTXReg<"rd6">;
-def RD7  : PTXReg<"rd7">;
-def RD8  : PTXReg<"rd8">;
-def RD9  : PTXReg<"rd9">;
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
 def RD10 : PTXReg<"rd10">;
 def RD11 : PTXReg<"rd11">;
 def RD12 : PTXReg<"rd12">;
@@ -288,204 +479,77 @@ def RD60 : PTXReg<"rd60">;
 def RD61 : PTXReg<"rd61">;
 def RD62 : PTXReg<"rd62">;
 def RD63 : PTXReg<"rd63">;
-
-
-///===- 32-bit Floating-Point Registers -----------------------------------===//
-
-def F0  : PTXReg<"f0">;
-def F1  : PTXReg<"f1">;
-def F2  : PTXReg<"f2">;
-def F3  : PTXReg<"f3">;
-def F4  : PTXReg<"f4">;
-def F5  : PTXReg<"f5">;
-def F6  : PTXReg<"f6">;
-def F7  : PTXReg<"f7">;
-def F8  : PTXReg<"f8">;
-def F9  : PTXReg<"f9">;
-def F10 : PTXReg<"f10">;
-def F11 : PTXReg<"f11">;
-def F12 : PTXReg<"f12">;
-def F13 : PTXReg<"f13">;
-def F14 : PTXReg<"f14">;
-def F15 : PTXReg<"f15">;
-def F16 : PTXReg<"f16">;
-def F17 : PTXReg<"f17">;
-def F18 : PTXReg<"f18">;
-def F19 : PTXReg<"f19">;
-def F20 : PTXReg<"f20">;
-def F21 : PTXReg<"f21">;
-def F22 : PTXReg<"f22">;
-def F23 : PTXReg<"f23">;
-def F24 : PTXReg<"f24">;
-def F25 : PTXReg<"f25">;
-def F26 : PTXReg<"f26">;
-def F27 : PTXReg<"f27">;
-def F28 : PTXReg<"f28">;
-def F29 : PTXReg<"f29">;
-def F30 : PTXReg<"f30">;
-def F31 : PTXReg<"f31">;
-def F32 : PTXReg<"f32">;
-def F33 : PTXReg<"f33">;
-def F34 : PTXReg<"f34">;
-def F35 : PTXReg<"f35">;
-def F36 : PTXReg<"f36">;
-def F37 : PTXReg<"f37">;
-def F38 : PTXReg<"f38">;
-def F39 : PTXReg<"f39">;
-def F40 : PTXReg<"f40">;
-def F41 : PTXReg<"f41">;
-def F42 : PTXReg<"f42">;
-def F43 : PTXReg<"f43">;
-def F44 : PTXReg<"f44">;
-def F45 : PTXReg<"f45">;
-def F46 : PTXReg<"f46">;
-def F47 : PTXReg<"f47">;
-def F48 : PTXReg<"f48">;
-def F49 : PTXReg<"f49">;
-def F50 : PTXReg<"f50">;
-def F51 : PTXReg<"f51">;
-def F52 : PTXReg<"f52">;
-def F53 : PTXReg<"f53">;
-def F54 : PTXReg<"f54">;
-def F55 : PTXReg<"f55">;
-def F56 : PTXReg<"f56">;
-def F57 : PTXReg<"f57">;
-def F58 : PTXReg<"f58">;
-def F59 : PTXReg<"f59">;
-def F60 : PTXReg<"f60">;
-def F61 : PTXReg<"f61">;
-def F62 : PTXReg<"f62">;
-def F63 : PTXReg<"f63">;
-
-
-///===- 64-bit Floating-Point Registers -----------------------------------===//
-
-def FD0  : PTXReg<"fd0">;
-def FD1  : PTXReg<"fd1">;
-def FD2  : PTXReg<"fd2">;
-def FD3  : PTXReg<"fd3">;
-def FD4  : PTXReg<"fd4">;
-def FD5  : PTXReg<"fd5">;
-def FD6  : PTXReg<"fd6">;
-def FD7  : PTXReg<"fd7">;
-def FD8  : PTXReg<"fd8">;
-def FD9  : PTXReg<"fd9">;
-def FD10 : PTXReg<"fd10">;
-def FD11 : PTXReg<"fd11">;
-def FD12 : PTXReg<"fd12">;
-def FD13 : PTXReg<"fd13">;
-def FD14 : PTXReg<"fd14">;
-def FD15 : PTXReg<"fd15">;
-def FD16 : PTXReg<"fd16">;
-def FD17 : PTXReg<"fd17">;
-def FD18 : PTXReg<"fd18">;
-def FD19 : PTXReg<"fd19">;
-def FD20 : PTXReg<"fd20">;
-def FD21 : PTXReg<"fd21">;
-def FD22 : PTXReg<"fd22">;
-def FD23 : PTXReg<"fd23">;
-def FD24 : PTXReg<"fd24">;
-def FD25 : PTXReg<"fd25">;
-def FD26 : PTXReg<"fd26">;
-def FD27 : PTXReg<"fd27">;
-def FD28 : PTXReg<"fd28">;
-def FD29 : PTXReg<"fd29">;
-def FD30 : PTXReg<"fd30">;
-def FD31 : PTXReg<"fd31">;
-def FD32 : PTXReg<"fd32">;
-def FD33 : PTXReg<"fd33">;
-def FD34 : PTXReg<"fd34">;
-def FD35 : PTXReg<"fd35">;
-def FD36 : PTXReg<"fd36">;
-def FD37 : PTXReg<"fd37">;
-def FD38 : PTXReg<"fd38">;
-def FD39 : PTXReg<"fd39">;
-def FD40 : PTXReg<"fd40">;
-def FD41 : PTXReg<"fd41">;
-def FD42 : PTXReg<"fd42">;
-def FD43 : PTXReg<"fd43">;
-def FD44 : PTXReg<"fd44">;
-def FD45 : PTXReg<"fd45">;
-def FD46 : PTXReg<"f4d6">;
-def FD47 : PTXReg<"fd47">;
-def FD48 : PTXReg<"fd48">;
-def FD49 : PTXReg<"fd49">;
-def FD50 : PTXReg<"fd50">;
-def FD51 : PTXReg<"fd51">;
-def FD52 : PTXReg<"fd52">;
-def FD53 : PTXReg<"fd53">;
-def FD54 : PTXReg<"fd54">;
-def FD55 : PTXReg<"fd55">;
-def FD56 : PTXReg<"fd56">;
-def FD57 : PTXReg<"fd57">;
-def FD58 : PTXReg<"fd58">;
-def FD59 : PTXReg<"fd59">;
-def FD60 : PTXReg<"fd60">;
-def FD61 : PTXReg<"fd61">;
-def FD62 : PTXReg<"fd62">;
-def FD63 : PTXReg<"fd63">;
-
+def RD64 : PTXReg<"rd64">;
+def RD65 : PTXReg<"rd65">;
+def RD66 : PTXReg<"rd66">;
+def RD67 : PTXReg<"rd67">;
+def RD68 : PTXReg<"rd68">;
+def RD69 : PTXReg<"rd69">;
+def RD70 : PTXReg<"rd70">;
+def RD71 : PTXReg<"rd71">;
+def RD72 : PTXReg<"rd72">;
+def RD73 : PTXReg<"rd73">;
+def RD74 : PTXReg<"rd74">;
+def RD75 : PTXReg<"rd75">;
+def RD76 : PTXReg<"rd76">;
+def RD77 : PTXReg<"rd77">;
+def RD78 : PTXReg<"rd78">;
+def RD79 : PTXReg<"rd79">;
+def RD80 : PTXReg<"rd80">;
+def RD81 : PTXReg<"rd81">;
+def RD82 : PTXReg<"rd82">;
+def RD83 : PTXReg<"rd83">;
+def RD84 : PTXReg<"rd84">;
+def RD85 : PTXReg<"rd85">;
+def RD86 : PTXReg<"rd86">;
+def RD87 : PTXReg<"rd87">;
+def RD88 : PTXReg<"rd88">;
+def RD89 : PTXReg<"rd89">;
+def RD90 : PTXReg<"rd90">;
+def RD91 : PTXReg<"rd91">;
+def RD92 : PTXReg<"rd92">;
+def RD93 : PTXReg<"rd93">;
+def RD94 : PTXReg<"rd94">;
+def RD95 : PTXReg<"rd95">;
+def RD96 : PTXReg<"rd96">;
+def RD97 : PTXReg<"rd97">;
+def RD98 : PTXReg<"rd98">;
+def RD99 : PTXReg<"rd99">;
+def RD100 : PTXReg<"rd100">;
+def RD101 : PTXReg<"rd101">;
+def RD102 : PTXReg<"rd102">;
+def RD103 : PTXReg<"rd103">;
+def RD104 : PTXReg<"rd104">;
+def RD105 : PTXReg<"rd105">;
+def RD106 : PTXReg<"rd106">;
+def RD107 : PTXReg<"rd107">;
+def RD108 : PTXReg<"rd108">;
+def RD109 : PTXReg<"rd109">;
+def RD110 : PTXReg<"rd110">;
+def RD111 : PTXReg<"rd111">;
+def RD112 : PTXReg<"rd112">;
+def RD113 : PTXReg<"rd113">;
+def RD114 : PTXReg<"rd114">;
+def RD115 : PTXReg<"rd115">;
+def RD116 : PTXReg<"rd116">;
+def RD117 : PTXReg<"rd117">;
+def RD118 : PTXReg<"rd118">;
+def RD119 : PTXReg<"rd119">;
+def RD120 : PTXReg<"rd120">;
+def RD121 : PTXReg<"rd121">;
+def RD122 : PTXReg<"rd122">;
+def RD123 : PTXReg<"rd123">;
+def RD124 : PTXReg<"rd124">;
+def RD125 : PTXReg<"rd125">;
+def RD126 : PTXReg<"rd126">;
+def RD127 : PTXReg<"rd127">;
 
 //===----------------------------------------------------------------------===//
 //  Register classes
 //===----------------------------------------------------------------------===//
-
-def Preds : RegisterClass<"PTX", [i1], 8,
-                          [P0, P1, P2, P3, P4, P5, P6, P7,
-                           P8, P9, P10, P11, P12, P13, P14, P15,
-                           P16, P17, P18, P19, P20, P21, P22, P23,
-                           P24, P25, P26, P27, P28, P29, P30, P31,
-                           P32, P33, P34, P35, P36, P37, P38, P39,
-                           P40, P41, P42, P43, P44, P45, P46, P47,
-                           P48, P49, P50, P51, P52, P53, P54, P55,
-                           P56, P57, P58, P59, P60, P61, P62, P63]>;
-
-def RRegu16 : RegisterClass<"PTX", [i16], 16,
-                            [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
-                             RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
-                             RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
-                             RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31,
-                             RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39,
-                             RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47,
-                             RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55,
-                             RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>;
-
-def RRegu32 : RegisterClass<"PTX", [i32], 32,
-                            [R0, R1, R2, R3, R4, R5, R6, R7,
-                             R8, R9, R10, R11, R12, R13, R14, R15,
-                             R16, R17, R18, R19, R20, R21, R22, R23,
-                             R24, R25, R26, R27, R28, R29, R30, R31,
-                             R32, R33, R34, R35, R36, R37, R38, R39,
-                             R40, R41, R42, R43, R44, R45, R46, R47,
-                             R48, R49, R50, R51, R52, R53, R54, R55,
-                             R56, R57, R58, R59, R60, R61, R62, R63]>;
-
-def RRegu64 : RegisterClass<"PTX", [i64], 64,
-                            [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
-                             RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
-                             RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
-                             RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31,
-                             RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39,
-                             RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47,
-                             RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55,
-                             RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>;
-
-def RRegf32 : RegisterClass<"PTX", [f32], 32,
-                            [F0, F1, F2, F3, F4, F5, F6, F7,
-                             F8, F9, F10, F11, F12, F13, F14, F15,
-                             F16, F17, F18, F19, F20, F21, F22, F23,
-                             F24, F25, F26, F27, F28, F29, F30, F31,
-                             F32, F33, F34, F35, F36, F37, F38, F39,
-                             F40, F41, F42, F43, F44, F45, F46, F47,
-                             F48, F49, F50, F51, F52, F53, F54, F55,
-                             F56, F57, F58, F59, F60, F61, F62, F63]>;
-
-def RRegf64 : RegisterClass<"PTX", [f64], 64,
-                            [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
-                             FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
-                             FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
-                             FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31,
-                             FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39,
-                             FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47,
-                             FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55,
-                             FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>;
+def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index e8a1dfecd00c..8ec646e46f68 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -7,32 +7,51 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the PTX specific subclass of TargetSubtarget.
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PTXSubtarget.h"
+#include "PTX.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
 
 using namespace llvm;
 
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
-                           bool is64Bit)
-  : PTXShaderModel(PTX_SM_1_0),
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PTXGenSubtargetInfo(TT, CPU, FS),
+    PTXTarget(PTX_COMPUTE_1_0),
     PTXVersion(PTX_VERSION_2_0),
     SupportsDouble(false),
     SupportsFMA(true),
-    Is64Bit(is64Bit) {	
-  std::string TARGET = "generic";
-  ParseSubtargetFeatures(FS, TARGET);
+    Is64Bit(is64Bit) {
+  std::string TARGET = CPU;
+  if (TARGET.empty())
+    TARGET = "generic";
+  ParseSubtargetFeatures(TARGET, FS);
 }
 
 std::string PTXSubtarget::getTargetString() const {
-  switch(PTXShaderModel) {
-    default: llvm_unreachable("Unknown shader model");
+  switch(PTXTarget) {
+    default: llvm_unreachable("Unknown PTX target");
     case PTX_SM_1_0: return "sm_10";
+    case PTX_SM_1_1: return "sm_11";
+    case PTX_SM_1_2: return "sm_12";
     case PTX_SM_1_3: return "sm_13";
     case PTX_SM_2_0: return "sm_20";
+    case PTX_SM_2_1: return "sm_21";
+    case PTX_SM_2_2: return "sm_22";
+    case PTX_SM_2_3: return "sm_23";
+    case PTX_COMPUTE_1_0: return "compute_10";
+    case PTX_COMPUTE_1_1: return "compute_11";
+    case PTX_COMPUTE_1_2: return "compute_12";
+    case PTX_COMPUTE_1_3: return "compute_13";
+    case PTX_COMPUTE_2_0: return "compute_20";
   }
 }
 
@@ -45,5 +64,3 @@ std::string PTXSubtarget::getPTXVersionString() const {
     case PTX_VERSION_2_3: return "2.3";
   }
 }
-
-#include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 59fa6965bbac..0921f1f22c49 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -7,26 +7,44 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the PTX specific subclass of TargetSubtarget.
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef PTX_SUBTARGET_H
 #define PTX_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
 
 namespace llvm {
-  class PTXSubtarget : public TargetSubtarget {
-    private:
+class StringRef;
+
+  class PTXSubtarget : public PTXGenSubtargetInfo {
+    public:
 
       /**
        * Enumeration of Shader Models supported by the back-end.
        */
-      enum PTXShaderModelEnum {
+      enum PTXTargetEnum {
+        PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+        PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+        PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+        PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+        PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+        PTX_LAST_COMPUTE,
+
         PTX_SM_1_0, /*< Shader Model 1.0 */
+        PTX_SM_1_1, /*< Shader Model 1.1 */
+        PTX_SM_1_2, /*< Shader Model 1.2 */
         PTX_SM_1_3, /*< Shader Model 1.3 */
-        PTX_SM_2_0  /*< Shader Model 2.0 */
+        PTX_SM_2_0, /*< Shader Model 2.0 */
+        PTX_SM_2_1, /*< Shader Model 2.1 */
+        PTX_SM_2_2, /*< Shader Model 2.2 */
+        PTX_SM_2_3, /*< Shader Model 2.3 */
+        PTX_LAST_SM
       };
 
       /**
@@ -41,24 +59,30 @@ namespace llvm {
         PTX_VERSION_2_3   /*< PTX Version 2.3 */
       };
 
+  private:
+
       /// Shader Model supported on the target GPU.
-      PTXShaderModelEnum PTXShaderModel;
+      PTXTargetEnum PTXTarget;
 
       /// PTX Language Version.
       PTXVersionEnum PTXVersion;
 
       // The native .f64 type is supported on the hardware.
       bool SupportsDouble;
-      
-      // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions
+
+      // Support the fused-multiply add (FMA) and multiply-add (MAD)
+      // instructions
       bool SupportsFMA;
-      
+
       // Use .u64 instead of .u32 for addresses.
       bool Is64Bit;
 
     public:
-      PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
 
+      PTXSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS, bool is64Bit);
+
+      // Target architecture accessors
       std::string getTargetString() const;
 
       std::string getPTXVersionString() const;
@@ -68,10 +92,6 @@ namespace llvm {
       bool is64Bit() const { return Is64Bit; }
 
       bool supportsFMA() const { return SupportsFMA; }
-      
-      bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
-
-      bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
 
       bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
 
@@ -79,8 +99,22 @@ namespace llvm {
 
       bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
 
-      std::string ParseSubtargetFeatures(const std::string &FS,
-                                         const std::string &CPU);
+      bool fdivNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool fmadNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool useParamSpaceForDeviceArgs() const {
+        return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   }; // class PTXSubtarget
 } // namespace llvm
 
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 1b737c9d8634..ab926e02d66f 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PTX.h"
-#include "PTXMCAsmInfo.h"
 #include "PTXTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -35,9 +34,6 @@ extern "C" void LLVMInitializePTXTarget() {
   RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
   RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
 
-  RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target);
-  RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target);
-
   TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
   TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
 }
@@ -52,11 +48,12 @@ namespace {
 // DataLayout and FrameLowering are filled with dummy data
 PTXTargetMachine::PTXTargetMachine(const Target &T,
                                    const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout(is64Bit ? DataLayout64 : DataLayout32),
-    Subtarget(TT, FS, is64Bit),
+    Subtarget(TT, CPU, FS, is64Bit),
     FrameLowering(Subtarget),
     InstrInfo(*this),
     TLInfo(*this) {
@@ -64,14 +61,16 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
 
 PTX32TargetMachine::PTX32TargetMachine(const Target &T,
                                        const std::string& TT,
+                                       const std::string& CPU,
                                        const std::string& FS)
-  : PTXTargetMachine(T, TT, FS, false) {
+  : PTXTargetMachine(T, TT, CPU, FS, false) {
 }
 
 PTX64TargetMachine::PTX64TargetMachine(const Target &T,
                                        const std::string& TT,
+                                       const std::string& CPU,
                                        const std::string& FS)
-  : PTXTargetMachine(T, TT, FS, true) {
+  : PTXTargetMachine(T, TT, CPU, FS, true) {
 }
 
 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 149be8e3b7e9..ae4215325211 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -33,7 +33,8 @@ class PTXTargetMachine : public LLVMTargetMachine {
 
   public:
     PTXTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS, bool is64Bit);
+                     const std::string &CPU, const std::string &FS,
+                     bool is64Bit);
 
     virtual const TargetData *getTargetData() const { return &DataLayout; }
 
@@ -61,14 +62,14 @@ class PTX32TargetMachine : public PTXTargetMachine {
 public:
 
   PTX32TargetMachine(const Target &T, const std::string &TT,
-                     const std::string& FS);
+                     const std::string& CPU, const std::string& FS);
 }; // class PTX32TargetMachine
 
 class PTX64TargetMachine : public PTXTargetMachine {
 public:
 
   PTX64TargetMachine(const Target &T, const std::string &TT,
-                     const std::string& FS);
+                     const std::string& CPU, const std::string& FS);
 }; // class PTX32TargetMachine
 
 } // namespace llvm
diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py
new file mode 100755
index 000000000000..15286908961d
--- /dev/null
+++ b/lib/Target/PTX/generate-register-td.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+##===- generate-register-td.py --------------------------------*-python-*--===##
+##
+##                     The LLVM Compiler Infrastructure
+##
+## This file is distributed under the University of Illinois Open Source
+## License. See LICENSE.TXT for details.
+##
+##===----------------------------------------------------------------------===##
+##
+## This file describes the PTX register file generator.
+##
+##===----------------------------------------------------------------------===##
+
+from sys import argv, exit, stdout
+
+
+if len(argv) != 5:
+    print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
+    exit(1)
+
+try:
+    num_pred  = int(argv[1])
+    num_16bit = int(argv[2])
+    num_32bit = int(argv[3])
+    num_64bit = int(argv[4])
+except:
+    print('ERROR: Invalid integer parameter')
+    exit(1)
+
+## Print the register definition file
+td_file = open('PTXRegisterInfo.td', 'w')
+
+td_file.write('''
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print predicate registers
+td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
+for r in range(0, num_pred):
+    td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
+
+# Print 16-bit registers
+td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_16bit):
+    td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
+
+# Print 32-bit registers
+td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_32bit):
+    td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
+
+# Print 64-bit registers
+td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_64bit):
+    td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
+
+
+td_file.write('''
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print register classes
+
+td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
+td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
+td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+
+
+td_file.close()
+
+## Now write the PTXCallingConv.td file
+td_file = open('PTXCallingConv.td', 'w')
+
+# Reserve 10% of the available registers for return values, and the other 90%
+# for parameters
+num_ret_pred    = int(0.1 * num_pred)
+num_ret_16bit   = int(0.1 * num_16bit)
+num_ret_32bit   = int(0.1 * num_32bit)
+num_ret_64bit   = int(0.1 * num_64bit)
+num_param_pred  = num_pred - num_ret_pred
+num_param_16bit = num_16bit - num_ret_16bit
+num_param_32bit = num_32bit - num_ret_32bit
+num_param_64bit = num_64bit - num_ret_64bit
+
+param_regs_pred  = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
+ret_regs_pred    = ['P%d' % i for i in range(0, num_ret_pred)]
+param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
+ret_regs_16bit   = ['RH%d' % i for i in range(0, num_ret_16bit)]
+param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
+ret_regs_32bit   = ['R%d' % i for i in range(0, num_ret_32bit)]
+param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
+ret_regs_64bit   = ['RD%d' % i for i in range(0, num_ret_64bit)]
+
+param_list_pred  = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
+ret_list_pred    = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
+param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
+ret_list_16bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
+param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
+ret_list_32bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
+param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
+ret_list_64bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
+
+td_file.write('''
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
+       ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
+
+
+td_file.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index f28257999d1b..d1dda3716c4a 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,16 +1,13 @@
 set(LLVM_TARGET_DEFINITIONS PPC.td)
 
-tablegen(PPCGenInstrNames.inc -gen-instr-enums)
-tablegen(PPCGenRegisterNames.inc -gen-register-enums)
 tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
 tablegen(PPCGenCodeEmitter.inc -gen-emitter)
 tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
-tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
+tablegen(PPCGenRegisterInfo.inc -gen-register-info)
+tablegen(PPCGenInstrInfo.inc -gen-instr-info)
 tablegen(PPCGenDAGISel.inc -gen-dag-isel)
 tablegen(PPCGenCallingConv.inc -gen-callingconv)
-tablegen(PPCGenSubtarget.inc -gen-subtarget)
+tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(PowerPCCodeGen
   PPCAsmBackend.cpp
@@ -23,7 +20,6 @@ add_llvm_target(PowerPCCodeGen
   PPCISelLowering.cpp
   PPCFrameLowering.cpp
   PPCJITInfo.cpp
-  PPCMCAsmInfo.cpp
   PPCMCCodeEmitter.cpp
   PPCMCInstLower.cpp
   PPCPredicates.cpp
@@ -35,3 +31,4 @@ add_llvm_target(PowerPCCodeGen
 
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index adfa0aa6306b..d022a4496e84 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -19,14 +19,12 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
 
 class PPCInstPrinter : public MCInstPrinter {
   // 0 -> AIX, 1 -> Darwin.
   unsigned SyntaxVariant;
 public:
-  PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI,
-                 unsigned syntaxVariant)
+  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
     : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
   
   bool isDarwinSyntax() const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..a1b81662115a
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPowerPCDesc
+  PPCMCTargetDesc.cpp
+  PPCMCAsmInfo.cpp
+  )
diff --git a/lib/Target/PowerPC/MCTargetDesc/Makefile b/lib/Target/PowerPC/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..9db66622cced
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2d5c8809ba9f..b6dca835b18d 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -15,6 +15,10 @@
 using namespace llvm;
 
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+  if (is64Bit)
+    PointerSize = 8;
+  IsLittleEndian = false;
+
   PCSymbol = ".";
   CommentString = ";";
   ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 96ae6fbba0e4..96ae6fbba0e4 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 000000000000..02b887f4d5dc
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,70 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "PPCMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPPCMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+                                          createPPCMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+                                          createPPCMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+  if (TheTriple.isOSDarwin())
+    return new PPCMCAsmInfoDarwin(isPPC64);
+  return new PPCLinuxMCAsmInfo(isPPC64);
+  
+}
+
+extern "C" void LLVMInitializePowerPCMCAsmInfo() {
+  RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+  RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);  
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 000000000000..cee235097a0a
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+  
+} // End llvm namespace
+
+// Defines symbolic names for PowerPC registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 030defe212c0..1617b26ca4a5 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen
 TARGET = PPC
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+BUILT_SOURCES = PPCGenRegisterInfo.inc \
                 PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
-                PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
-                PPCGenSubtarget.inc PPCGenCallingConv.inc \
+                PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
                 PPCGenMCCodeEmitter.inc
 
-DIRS = InstPrinter TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 92672b5b172b..7191dd105f3c 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_POWERPC_H
 #define LLVM_TARGET_POWERPC_H
 
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include <string>
 
 // GCC #defines PPC on Linux but we use it as our namespace name
@@ -31,6 +32,8 @@ namespace llvm {
   class MCInst;
   class MCCodeEmitter;
   class MCContext;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
   class TargetMachine;
   class TargetAsmBackend;
   
@@ -38,16 +41,14 @@ namespace llvm {
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
                                             JITCodeEmitter &MCE);
-  MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+  MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                        const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
   TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
   
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
   
-  extern Target ThePPC32Target;
-  extern Target ThePPC64Target;
-  
   namespace PPCII {
     
   /// Target Operand Flag enum.
@@ -81,13 +82,4 @@ namespace llvm {
   
 } // end namespace llvm;
 
-// Defines symbolic names for PowerPC registers.  This defines a mapping from
-// register name to register number.
-//
-#include "PPCGenRegisterNames.inc"
-
-// Defines symbolic names for the PowerPC instructions.
-//
-#include "PPCGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp
index f562a3f4f9e8..4b8cbb711833 100644
--- a/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -13,6 +13,7 @@
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -23,6 +24,11 @@ public:
   PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
                       uint32_t CPUSubtype)
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {}
 };
 
 class PPCAsmBackend : public TargetAsmBackend {
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b795db9594ff..9de2200296e8 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -680,10 +680,9 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
 }
 
 static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
-  return new PPCInstPrinter(TM, MAI, SyntaxVariant);
+  return new PPCInstPrinter(MAI, SyntaxVariant);
 }
 
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 74ecff5af620..cddc9d858adf 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -73,12 +73,12 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
   }
   Opcode = ~Opcode;
 
-  const TargetInstrDesc &TID = TII.get(Opcode);
+  const MCInstrDesc &MCID = TII.get(Opcode);
 
-  isLoad  = TID.mayLoad();
-  isStore = TID.mayStore();
+  isLoad  = MCID.mayLoad();
+  isStore = MCID.mayStore();
 
-  uint64_t TSFlags = TID.TSFlags;
+  uint64_t TSFlags = MCID.TSFlags;
 
   isFirst   = TSFlags & PPCII::PPC970_First;
   isSingle  = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 511bb223cada..2176c02c8503 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -610,6 +610,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+
   if (isInt32Immediate(N->getOperand(1), Imm)) {
     // We can codegen setcc op, imm very efficiently compared to a brcond.
     // Check for those cases here.
@@ -624,6 +627,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
       case ISD::SETNE: {
+        if (isPPC64) break;
         SDValue AD =
           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                          Op, getI32Imm(~0U)), 0);
@@ -647,6 +651,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       switch (CC) {
       default: break;
       case ISD::SETEQ:
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(1)), 0);
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
@@ -655,6 +660,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
                                                              getI32Imm(0)), 0),
                                       Op.getValue(1));
       case ISD::SETNE: {
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(~0U));
@@ -996,22 +1002,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::SELECT_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+    EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+    bool isPPC64 = (PtrVT == MVT::i64);
 
     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
-    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-      if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-        if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
-          if (N1C->isNullValue() && N3C->isNullValue() &&
-              N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
-              // FIXME: Implement this optzn for PPC64.
-              N->getValueType(0) == MVT::i32) {
-            SDNode *Tmp =
-              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                     N->getOperand(0), getI32Imm(~0U));
-            return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
-                                        SDValue(Tmp, 0), N->getOperand(0),
-                                        SDValue(Tmp, 1));
-          }
+    if (!isPPC64)
+      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+            if (N1C->isNullValue() && N3C->isNullValue() &&
+                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+                // FIXME: Implement this optzn for PPC64.
+                N->getValueType(0) == MVT::i32) {
+              SDNode *Tmp =
+                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                       N->getOperand(0), getI32Imm(~0U));
+              return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+                                          SDValue(Tmp, 0), N->getOperand(0),
+                                          SDValue(Tmp, 1));
+            }
 
     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
     unsigned BROpc = getPredicateForSetCC(CC);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index dbb184c1f6bb..9741a3902af7 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -125,10 +125,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
 
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
@@ -215,10 +217,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
   // VAARG is custom lowered with the 32-bit SVR4 ABI.
-  if (    TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-      && !TM.getSubtarget<PPCSubtarget>().isPPC64())
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+      && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
     setOperationAction(ISD::VAARG, MVT::Other, Custom);
-  else
+    setOperationAction(ISD::VAARG, MVT::i64, Custom);
+  } else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 
   // Use the default implementation.
@@ -1262,9 +1265,107 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+  // gpr_index
+  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = GprIndex.getValue(1);
+
+  if (VT == MVT::i64) {
+    // Check if GprIndex is even
+    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+                                 DAG.getConstant(1, MVT::i32));
+    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
+    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+                                          DAG.getConstant(1, MVT::i32));
+    // Align GprIndex to be even if it isn't
+    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+                           GprIndex);
+  }
+
+  // fpr index is 1 byte after gpr
+  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                               DAG.getConstant(1, MVT::i32));
+
+  // fpr
+  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = FprIndex.getValue(1);
+
+  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                       DAG.getConstant(8, MVT::i32));
+
+  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                        DAG.getConstant(4, MVT::i32));
 
-  llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
-  return SDValue(); // Not reached
+  // areas
+  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+                                     MachinePointerInfo(), false, false, 0);
+  InChain = OverflowArea.getValue(1);
+
+  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+                                    MachinePointerInfo(), false, false, 0);
+  InChain = RegSaveArea.getValue(1);
+
+  // select overflow_area if index > 8
+  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+  // adjustment constant gpr_index * 4/8
+  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+                                    VT.isInteger() ? GprIndex : FprIndex,
+                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                                    MVT::i32));
+
+  // OurReg = RegSaveArea + RegConstant
+  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+                               RegConstant);
+
+  // Floating types are 32 bytes into RegSaveArea
+  if (VT.isFloatingPoint())
+    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+                         DAG.getConstant(32, MVT::i32));
+
+  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   VT.isInteger() ? GprIndex : FprIndex,
+                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+                                                   MVT::i32));
+
+  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+                              VT.isInteger() ? VAListPtr : FprPtr,
+                              MachinePointerInfo(SV),
+                              MVT::i8, false, false, 0);
+
+  // determine if we should load from reg_save_area or overflow_area
+  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+  // increase overflow_area by 4/8 if gpr/fpr > 8
+  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                          MVT::i32));
+
+  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+                             OverflowAreaPlusN);
+
+  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+                              OverflowAreaPtr,
+                              MachinePointerInfo(),
+                              MVT::i32, false, false, 0);
+
+  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
@@ -1870,7 +1971,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                             MachinePointerInfo(),
@@ -1889,7 +1994,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2902,6 +3011,12 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+    RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+  }
+
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
@@ -2911,13 +3026,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     InFlag = Chain.getValue(1);
   }
 
-  // Set CR6 to true if this is a vararg call.
-  if (isVarArg) {
-    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
   if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                     false, TailCallArguments);
@@ -4422,11 +4530,27 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
+  const TargetMachine &TM = getTargetMachine();
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
     assert(false && "Do not know how to custom type legalize this operation!");
     return;
+  case ISD::VAARG: {
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+      return;
+
+    EVT VT = N->getValueType(0);
+
+    if (VT == MVT::i64) {
+      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+      Results.push_back(NewNode);
+      Results.push_back(NewNode.getValue(1));
+    }
+    return;
+  }
   case ISD::FP_ROUND_INREG: {
     assert(N->getValueType(0) == MVT::ppcf128);
     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
@@ -4676,7 +4800,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(PPC::STWCX))
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 53b049135e24..143444fdc22b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,22 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPredicates.h"
-#include "PPCGenInstrInfo.inc"
 #include "PPCTargetMachine.h"
 #include "PPCHazardRecognizers.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
 
 namespace llvm {
 extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
@@ -37,8 +41,8 @@ extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
 using namespace llvm;
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
-  : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
-    RI(*TM.getSubtargetImpl(), *this) {}
+  : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
 /// this target when scheduling the DAG.
@@ -120,7 +124,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   // destination register as well.
   if (Reg0 == Reg1) {
     // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
            "Expecting a two-address instruction!");
     Reg2IsKill = false;
     ChangeReg0 = true;
@@ -315,12 +319,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else
     llvm_unreachable("Impossible reg-to-reg copy");
 
-  const TargetInstrDesc &TID = get(Opc);
-  if (TID.getNumOperands() == 3)
-    BuildMI(MBB, I, DL, TID, DestReg)
+  const MCInstrDesc &MCID = get(Opc);
+  if (MCID.getNumOperands() == 3)
+    BuildMI(MBB, I, DL, MCID, DestReg)
       .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
   else
-    BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+    BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
 }
 
 bool
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index b5249ae03769..90bacc96c87e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -18,6 +18,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "PPCRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
 namespace llvm {
 
 /// PPCII - This namespace holds all of the PowerPC target-specific
@@ -61,7 +64,7 @@ enum PPC970_Unit {
 } // end namespace PPCII
 
 
-class PPCInstrInfo : public TargetInstrInfoImpl {
+class PPCInstrInfo : public PPCGenInstrInfo {
   PPCTargetMachine &TM;
   const PPCRegisterInfo RI;
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 78383e0603bd..4590f0045641 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -87,7 +87,7 @@ asm(
     // FIXME: could shrink frame
     // Set up a proper stack frame
     // FIXME Layout
-    //   PowerPC64 ABI linkage    -  24 bytes
+    //   PowerPC32 ABI linkage    -  24 bytes
     //                 parameters -  32 bytes
     //   13 double registers      - 104 bytes
     //   8 int registers          -  32 bytes
@@ -205,11 +205,27 @@ void PPC32CompilationCallback() {
 
 #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
     defined(__ppc64__)
+#ifdef __ELF__
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl PPC64CompilationCallback\n"
+    ".section \".opd\",\"aw\"\n"
+    ".align 3\n"
+"PPC64CompilationCallback:\n"
+    ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+    ".size PPC64CompilationCallback,24\n"
+    ".previous\n"
+    ".align 4\n"
+    ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
 asm(
     ".text\n"
     ".align 2\n"
     ".globl _PPC64CompilationCallback\n"
 "_PPC64CompilationCallback:\n"
+#endif
     // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
     // FIXME: need to save v[0-19] for altivec?
     // Set up a proper stack frame
@@ -218,49 +234,55 @@ asm(
     //                 parameters -  64 bytes
     //   13 double registers      - 104 bytes
     //   8 int registers          -  64 bytes
-    "mflr r0\n"
-    "std r0,  16(r1)\n"
-    "stdu r1, -280(r1)\n"
+    "mflr 0\n"
+    "std  0,  16(1)\n"
+    "stdu 1, -280(1)\n"
     // Save all int arg registers
-    "std r10, 272(r1)\n"    "std r9,  264(r1)\n"
-    "std r8,  256(r1)\n"    "std r7,  248(r1)\n"
-    "std r6,  240(r1)\n"    "std r5,  232(r1)\n"
-    "std r4,  224(r1)\n"    "std r3,  216(r1)\n"
+    "std 10, 272(1)\n"    "std 9,  264(1)\n"
+    "std 8,  256(1)\n"    "std 7,  248(1)\n"
+    "std 6,  240(1)\n"    "std 5,  232(1)\n"
+    "std 4,  224(1)\n"    "std 3,  216(1)\n"
     // Save all call-clobbered FP regs.
-    "stfd f13, 208(r1)\n"    "stfd f12, 200(r1)\n"
-    "stfd f11, 192(r1)\n"    "stfd f10, 184(r1)\n"
-    "stfd f9,  176(r1)\n"    "stfd f8,  168(r1)\n"
-    "stfd f7,  160(r1)\n"    "stfd f6,  152(r1)\n"
-    "stfd f5,  144(r1)\n"    "stfd f4,  136(r1)\n"
-    "stfd f3,  128(r1)\n"    "stfd f2,  120(r1)\n"
-    "stfd f1,  112(r1)\n"
+    "stfd 13, 208(1)\n"    "stfd 12, 200(1)\n"
+    "stfd 11, 192(1)\n"    "stfd 10, 184(1)\n"
+    "stfd 9,  176(1)\n"    "stfd 8,  168(1)\n"
+    "stfd 7,  160(1)\n"    "stfd 6,  152(1)\n"
+    "stfd 5,  144(1)\n"    "stfd 4,  136(1)\n"
+    "stfd 3,  128(1)\n"    "stfd 2,  120(1)\n"
+    "stfd 1,  112(1)\n"
     // Arguments to Compilation Callback:
     // r3 - our lr (address of the call instruction in stub plus 4)
     // r4 - stub's lr (address of instruction that called the stub plus 4)
     // r5 - is64Bit - always 1.
-    "mr   r3, r0\n"
-    "ld   r2, 280(r1)\n" // stub's frame
-    "ld   r4, 16(r2)\n"  // stub's lr
-    "li   r5, 1\n"       // 1 == 64 bit
+    "mr   3, 0\n"      // return address (still in r0)
+    "ld   5, 280(1)\n" // stub's frame
+    "ld   4, 16(5)\n"  // stub's lr
+    "li   5, 1\n"      // 1 == 64 bit
+#ifdef __ELF__
+    "bl PPCCompilationCallbackC\n"
+    "nop\n"
+#else
     "bl _PPCCompilationCallbackC\n"
-    "mtctr r3\n"
+#endif
+    "mtctr 3\n"
     // Restore all int arg registers
-    "ld r10, 272(r1)\n"    "ld r9,  264(r1)\n"
-    "ld r8,  256(r1)\n"    "ld r7,  248(r1)\n"
-    "ld r6,  240(r1)\n"    "ld r5,  232(r1)\n"
-    "ld r4,  224(r1)\n"    "ld r3,  216(r1)\n"
+    "ld 10, 272(1)\n"    "ld 9,  264(1)\n"
+    "ld 8,  256(1)\n"    "ld 7,  248(1)\n"
+    "ld 6,  240(1)\n"    "ld 5,  232(1)\n"
+    "ld 4,  224(1)\n"    "ld 3,  216(1)\n"
     // Restore all FP arg registers
-    "lfd f13, 208(r1)\n"    "lfd f12, 200(r1)\n"
-    "lfd f11, 192(r1)\n"    "lfd f10, 184(r1)\n"
-    "lfd f9,  176(r1)\n"    "lfd f8,  168(r1)\n"
-    "lfd f7,  160(r1)\n"    "lfd f6,  152(r1)\n"
-    "lfd f5,  144(r1)\n"    "lfd f4,  136(r1)\n"
-    "lfd f3,  128(r1)\n"    "lfd f2,  120(r1)\n"
-    "lfd f1,  112(r1)\n"
+    "lfd 13, 208(1)\n"    "lfd 12, 200(1)\n"
+    "lfd 11, 192(1)\n"    "lfd 10, 184(1)\n"
+    "lfd 9,  176(1)\n"    "lfd 8,  168(1)\n"
+    "lfd 7,  160(1)\n"    "lfd 6,  152(1)\n"
+    "lfd 5,  144(1)\n"    "lfd 4,  136(1)\n"
+    "lfd 3,  128(1)\n"    "lfd 2,  120(1)\n"
+    "lfd 1,  112(1)\n"
     // Pop 3 frames off the stack and branch to target
-    "ld  r1, 280(r1)\n"
-    "ld  r2, 16(r1)\n"
-    "mtlr r2\n"
+    "ld  1, 280(1)\n"
+    "ld  0, 16(1)\n"
+    "mtlr 0\n"
+    // XXX: any special TOC handling in the ELF case for JIT?
     "bctr\n"
     );
 #else
diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
index 65c2c82c51a7..cf73d861fa4d 100644
--- a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -28,12 +28,10 @@ namespace {
 class PPCMCCodeEmitter : public MCCodeEmitter {
   PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  MCContext &Ctx;
   
 public:
-  PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), Ctx(ctx) {
+  PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx) {
   }
   
   ~PPCMCCodeEmitter() {}
@@ -79,9 +77,10 @@ public:
   
 } // end anonymous namespace
   
-MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
-  return new PPCMCCodeEmitter(TM, Ctx);
+  return new PPCMCCodeEmitter(MCII, STI, Ctx);
 }
 
 unsigned PPCMCCodeEmitter::
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3374e9b0b631..9c2428b92e65 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -44,6 +44,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
 
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
 // FIXME (64-bit): Eventually enable by default.
 namespace llvm {
 cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
@@ -110,8 +113,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
 
 PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
                                  const TargetInstrInfo &tii)
-  : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {
+  : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -504,6 +506,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
   unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
   unsigned SrcReg = MI.getOperand(0).getReg();
+  bool LP64 = Subtarget.isPPC64();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
@@ -520,7 +523,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
       .addImm(0)
       .addImm(31);
 
-  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
                     .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
                     FrameIndex);
 
@@ -709,5 +712,3 @@ int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
 
   return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
 }
-
-#include "PPCGenRegisterInfo.inc"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 48c25625ea9b..33fe5ebcf4cd 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -16,9 +16,11 @@
 #define POWERPC32_REGISTERINFO_H
 
 #include "PPC.h"
-#include "PPCGenRegisterInfo.h.inc"
 #include <map>
 
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
 namespace llvm {
 class PPCSubtarget;
 class TargetInstrInfo;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 3c0190199a82..1acdf4eb853b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -276,15 +276,13 @@ def RM: SPR<512, "**ROUNDING MODE**">;
 /// Register classes
 // Allocate volatiles first
 // then nonvolatiles in reverse order since stmw/lmw save from rN to r31
-def GPRC : RegisterClass<"PPC", [i32], 32,
-     [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
-      R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
-      R16, R15, R14, R13, R31, R0, R1, LR]>;
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+                                                (sequence "R%u", 30, 13),
+                                                R31, R0, R1, LR)>;
 
-def G8RC : RegisterClass<"PPC", [i64], 64,
-     [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
-      X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
-      X16, X15, X14, X31, X13, X0, X1, LR8]>;
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+                                                (sequence "X%u", 30, 14),
+                                                X31, X13, X0, X1, LR8)>;
 
 // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
 // ABI the size of the Floating-point register save area is determined by the
@@ -293,41 +291,36 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
 // previous stack frame. By allocating non-volatiles in reverse order we make
 // sure that the Floating-point register save area is always as small as
 // possible because there aren't any unused spill slots.
-def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
-  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
-def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
-  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+                                                (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
- [V2, V3, V4, V5, V0, V1, 
-  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
-  V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>;
+                         (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+                             V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+                             V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
 
 def CRBITRC : RegisterClass<"PPC", [i32], 32,
-  [CR0LT, CR0GT, CR0EQ, CR0UN,
-   CR1LT, CR1GT, CR1EQ, CR1UN,
-   CR2LT, CR2GT, CR2EQ, CR2UN,
-   CR3LT, CR3GT, CR3EQ, CR3UN,
-   CR4LT, CR4GT, CR4EQ, CR4UN,
-   CR5LT, CR5GT, CR5EQ, CR5UN,
-   CR6LT, CR6GT, CR6EQ, CR6UN,
-   CR7LT, CR7GT, CR7EQ, CR7UN
-  ]>
+  (add CR0LT, CR0GT, CR0EQ, CR0UN,
+       CR1LT, CR1GT, CR1EQ, CR1UN,
+       CR2LT, CR2GT, CR2EQ, CR2UN,
+       CR3LT, CR3GT, CR3EQ, CR3UN,
+       CR4LT, CR4GT, CR4EQ, CR4UN,
+       CR5LT, CR5GT, CR5EQ, CR5UN,
+       CR6LT, CR6GT, CR6EQ, CR6UN,
+       CR7LT, CR7GT, CR7EQ, CR7UN)>
 {
   let CopyCost = -1;
 }
 
-def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
-  CR3, CR4]>
-{
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+                                                CR7, CR2, CR3, CR4)> {
   let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
 }
 
-def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
-def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
-def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
-def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> {
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
   let CopyCost = -1;
 }
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 5f3aa2328f9e..5ea9b0f6596c 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the PPC specific subclass of TargetSubtarget.
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,8 +15,13 @@
 #include "PPC.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
-#include "PPCGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
 #include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
 using namespace llvm;
 
 #if defined(__APPLE__)
@@ -57,9 +62,10 @@ static const char *GetCurrentPowerPCCPU() {
 #endif
 
 
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
-                           bool is64Bit)
-  : StackAlignment(16)
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PPCGenSubtargetInfo(TT, CPU, FS)
+  , StackAlignment(16)
   , DarwinDirective(PPC::DIR_NONE)
   , IsGigaProcessor(false)
   , Has64BitSupport(false)
@@ -73,13 +79,19 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
   , TargetTriple(TT) {
 
   // Determine default and user specified characteristics
-  std::string CPU = "generic";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
 #if defined(__APPLE__)
-  CPU = GetCurrentPowerPCCPU();
+  if (CPUName == "generic")
+    CPUName = GetCurrentPowerPCCPU();
 #endif
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // If we are generating code for ppc64, verify that options make sense.
   if (is64Bit) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 8fd1a447692d..e028de6b09de 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -7,23 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the PowerPC specific subclass of TargetSubtarget.
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
 
 namespace llvm {
+class StringRef;
 
 namespace PPC {
   // -m directive values.
@@ -43,7 +46,7 @@ namespace PPC {
 class GlobalValue;
 class TargetMachine;
   
-class PPCSubtarget : public TargetSubtarget {
+class PPCSubtarget : public PPCGenSubtargetInfo {
 protected:
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
@@ -73,13 +76,12 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+  PPCSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS, bool is64Bit);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
-
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   
   /// SetJITMode - This is called to inform the subtarget info that we are
   /// producing code for the JIT.
@@ -104,7 +106,7 @@ public:
     // Note, the alignment values for f64 and i64 on ppc64 in Darwin
     // documentation are wrong; these are correct (i.e. "what gcc does").
     return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
-                     : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32";
+                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
   }
 
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index d27e54e56699..e0ea5adba751 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "PPCMCAsmInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/MC/MCStreamer.h"
@@ -21,15 +20,6 @@
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
-  if (TheTriple.isOSDarwin())
-    return new PPCMCAsmInfoDarwin(isPPC64);
-  return new PPCLinuxMCAsmInfo(isPPC64);
-  
-}
-
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
@@ -48,9 +38,6 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
   
-  RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
-  RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
-  
   // Register the MC Code Emitter
   TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
@@ -67,9 +54,10 @@ extern "C" void LLVMInitializePowerPCTarget() {
 
 
 PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS, bool is64Bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
@@ -88,14 +76,16 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
 bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
 
 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
                                        const std::string &FS) 
-  : PPCTargetMachine(T, TT, FS, false) {
+  : PPCTargetMachine(T, TT, CPU, FS, false) {
 }
 
 
 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU, 
                                        const std::string &FS)
-  : PPCTargetMachine(T, TT, FS, true) {
+  : PPCTargetMachine(T, TT, CPU, FS, true) {
 }
 
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 2d2498943a2d..baf07e3498f8 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,7 +41,8 @@ class PPCTargetMachine : public LLVMTargetMachine {
 
 public:
   PPCTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS, bool is64Bit);
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
 
   virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameLowering  *getFrameLowering() const {
@@ -77,7 +78,7 @@ public:
 class PPC32TargetMachine : public PPCTargetMachine {
 public:
   PPC32TargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 };
 
 /// PPC64TargetMachine - PowerPC 64-bit target machine.
@@ -85,7 +86,7 @@ public:
 class PPC64TargetMachine : public PPCTargetMachine {
 public:
   PPC64TargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index fcec368a213b..4cc95340890d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -870,11 +870,6 @@ rshift_gt (unsigned int a)
    bar ();
 }
 
-void neg_eq_cst(unsigned int a) {
-if (-a == 123)
-bar();
-}
-
 All should simplify to a single comparison.  All of these are
 currently not optimized with "clang -emit-llvm-bc | opt
 -std-compile-opts".
@@ -1767,7 +1762,6 @@ case it choses instead to keep the max operation obvious.
 
 //===---------------------------------------------------------------------===//
 
-Switch lowering generates less than ideal code for the following switch:
 define void @a(i32 %x) nounwind {
 entry:
   switch i32 %x, label %if.end [
@@ -1788,19 +1782,15 @@ declare void @foo()
 Generated code on x86-64 (other platforms give similar results):
 a:
 	cmpl	$5, %edi
-	ja	.LBB0_2
-	movl	%edi, %eax
-	movl	$47, %ecx
-	btq	%rax, %rcx
-	jb	.LBB0_3
+	ja	LBB2_2
+	cmpl	$4, %edi
+	jne	LBB2_3
 .LBB0_2:
 	ret
 .LBB0_3:
 	jmp	foo  # TAILCALL
 
-The movl+movl+btq+jb could be simplified to a cmpl+jne.
-
-Or, if we wanted to be really clever, we could simplify the whole thing to
+If we wanted to be really clever, we could simplify the whole thing to
 something like the following, which eliminates a branch:
 	xorl    $1, %edi
 	cmpl	$4, %edi
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 6839234a4700..c77ded4b435e 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -1,13 +1,10 @@
 set(LLVM_TARGET_DEFINITIONS Sparc.td)
 
-tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SparcGenRegisterNames.inc -gen-register-enums)
-tablegen(SparcGenRegisterInfo.inc -gen-register-desc)
-tablegen(SparcGenInstrNames.inc -gen-instr-enums)
-tablegen(SparcGenInstrInfo.inc -gen-instr-desc)
+tablegen(SparcGenRegisterInfo.inc -gen-register-info)
+tablegen(SparcGenInstrInfo.inc -gen-instr-info)
 tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
 tablegen(SparcGenDAGISel.inc -gen-dag-isel)
-tablegen(SparcGenSubtarget.inc -gen-subtarget)
+tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
 tablegen(SparcGenCallingConv.inc -gen-callingconv)
 
 add_llvm_target(SparcCodeGen
@@ -18,7 +15,6 @@ add_llvm_target(SparcCodeGen
   SparcISelDAGToDAG.cpp
   SparcISelLowering.cpp
   SparcFrameLowering.cpp
-  SparcMCAsmInfo.cpp
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(SparcCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 4b12852ef873..dab35e5e4e6f 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -298,7 +298,7 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
     return false;
   if (candidate->getOpcode() == SP::UNIMP)
     return true;
-  const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+  const MCInstrDesc &prevdesc = (--candidate)->getDesc();
   return prevdesc.hasDelaySlot();
 }
 
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..1e8c02979887
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMSparcDesc
+  SparcMCTargetDesc.cpp
+  SparcMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Sparc/MCTargetDesc/Makefile b/lib/Target/Sparc/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..abcbe2da18ec
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index d37d6d231305..6a7e0902354e 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -12,9 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;
 
 SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::sparcv9)
+    PointerSize = 8;
+
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
   Data64bitsDirective = 0;  // .xword is only supported by V9.
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 0cb6827d2771..0cb6827d2771 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 000000000000..cb92a2bfd417
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSparcMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+                                          createSparcMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSparcMCAsmInfo() {
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 000000000000..2fd9e3f4cbd3
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index 27942c56fb3a..4b81ada956f2 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSparcCodeGen
 TARGET = Sparc
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
-                SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
-                SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
-                SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
+BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
+		SparcGenAsmWriter.inc SparcGenDAGISel.inc \
+		SparcGenSubtargetInfo.inc SparcGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index a37920d80308..7b2c6141dbf8 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_SPARC_H
 #define TARGET_SPARC_H
 
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -28,21 +29,8 @@ namespace llvm {
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
 
-  extern Target TheSparcTarget;
-  extern Target TheSparcV9Target;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Sparc registers.  This defines a mapping from
-// register name to register number.
-//
-#include "SparcGenRegisterNames.inc"
-
-// Defines symbolic names for the Sparc instructions.
-//
-#include "SparcGenInstrNames.inc"
-
-
 namespace llvm {
   // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
   // values must be kept in sync with the ones in the .td file.
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 0b4612df4e43..6f30d3fd6c35 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,4 +1,3 @@
-
 //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -755,9 +754,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
   setOperationAction(ISD::CTLZ , MVT::i32, Expand);
@@ -1265,26 +1266,6 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> SparcTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-  default: break;
-  case 'r':
-    return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3,
-                                 SP::L4, SP::L5, SP::L6, SP::L7,
-                                 SP::I0, SP::I1, SP::I2, SP::I3,
-                                 SP::I4, SP::I5,
-                                 SP::O0, SP::O1, SP::O2, SP::O3,
-                                 SP::O4, SP::O5, SP::O7, 0);
-  }
-
-  return std::vector<unsigned>();
-}
-
 bool
 SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Sparc target isn't yet aware of offsets.
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 9ea6e16e3ac1..8a1886a856e0 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -65,9 +65,6 @@ namespace llvm {
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index afa3c1f88f96..4e3ddf839985 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -12,19 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcInstrInfo.h"
-#include "SparcSubtarget.h"
 #include "Sparc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR
 #include "SparcGenInstrInfo.inc"
-#include "SparcMachineFunctionInfo.h"
+
 using namespace llvm;
 
 SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
-  : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)),
+  : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
     RI(ST, *this), Subtarget(ST) {
 }
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index b2d24f52503b..eda64efb7a03 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -17,6 +17,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "SparcRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
 namespace llvm {
 
 /// SPII - This namespace holds all of the target specific flags that
@@ -31,7 +34,7 @@ namespace SPII {
   };
 }
 
-class SparcInstrInfo : public TargetInstrInfoImpl {
+class SparcInstrInfo : public SparcGenInstrInfo {
   const SparcRegisterInfo RI;
   const SparcSubtarget& Subtarget;
 public:
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9fcf028fa60e..0acdd2c55d6b 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -23,12 +23,15 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
 using namespace llvm;
 
 SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
                                      const TargetInstrInfo &tii)
-  : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
-    Subtarget(st), TII(tii) {
+  : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
 }
 
 const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -135,6 +138,3 @@ int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
 int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "SparcGenRegisterInfo.inc"
-
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 56c8068228f0..ec9e63a686bc 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -15,7 +15,9 @@
 #define SPARCREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "SparcGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 0729818e85b8..cf928293c169 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -139,23 +139,21 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
-                                     I0, I1, I2, I3, I4, I5,
-                                     O0, O1, O2, O3, O4, O5, O7,
-                                     G1,
-                                     // Non-allocatable regs:
-                                     G2, G3, G4, // FIXME: OK for use only in
-                                                 // applications, not libraries.
-                                     O6, // stack ptr
-                                     I6, // frame ptr
-                                     I7, // return address
-                                     G0, // constant zero
-                                     G5, G6, G7 // reserved for kernel
-                                     ]>;
+def IntRegs : RegisterClass<"SP", [i32], 32,
+                            (add L0, L1, L2, L3, L4, L5, L6,
+                                 L7, I0, I1, I2, I3, I4, I5,
+                                 O0, O1, O2, O3, O4, O5, O7,
+                                 G1,
+                                 // Non-allocatable regs:
+                                 G2, G3, G4, // FIXME: OK for use only in
+                                             // applications, not libraries.
+                                 O6, // stack ptr
+                                 I6, // frame ptr
+                                 I7, // return address
+                                 G0, // constant zero
+                                 G5, G6, G7 // reserved for kernel
+                                 )>;
 
-def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
-  F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
-  F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
 
-def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
-  D8, D9, D10, D11, D12, D13, D14, D15]>;
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index ce11af1fa842..de647e8221a2 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -7,28 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the SPARC specific subclass of TargetSubtarget.
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SparcSubtarget.h"
-#include "SparcGenSubtarget.inc"
+#include "Sparc.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, 
-                               bool is64Bit) :
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS,  bool is64Bit) :
+  SparcGenSubtargetInfo(TT, CPU, FS),
   IsV9(false),
   V8DeprecatedInsts(false),
   IsVIS(false),
   Is64Bit(is64Bit) {
   
   // Determine default and user specified characteristics
-  const char *CPU = "v8";
-  if (is64Bit) {
-    CPU = "v9";
-    IsV9 = true;
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+    if (is64Bit)
+      CPUName = "v9";
+    else
+      CPUName = "v8";
   }
+  IsV9 = CPUName == "v9";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index cec0ab422bc2..00a04c3bea57 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -7,26 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the SPARC specific subclass of TargetSubtarget.
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef SPARC_SUBTARGET_H
 #define SPARC_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class SparcSubtarget : public TargetSubtarget {
+class SparcSubtarget : public SparcGenSubtargetInfo {
   bool IsV9;
   bool V8DeprecatedInsts;
   bool IsVIS;
   bool Is64Bit;
   
 public:
-  SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit);
+  SparcSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS, bool is64bit);
 
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
@@ -34,8 +39,7 @@ public:
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   
   bool is64Bit() const { return Is64Bit; }
   std::string getDataLayout() const {
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index b84eab568d29..cbe6d8754efd 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Sparc.h"
-#include "SparcMCAsmInfo.h"
 #include "SparcTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -21,18 +20,15 @@ extern "C" void LLVMInitializeSparcTarget() {
   // Register the target.
   RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
   RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
-
-  RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget);
-  RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target);
-
 }
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
 SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
                                        const std::string &FS, bool is64bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
     FrameLowering(Subtarget) {
@@ -56,12 +52,14 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
 
 SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
                                            const std::string &TT, 
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : SparcTargetMachine(T, TT, FS, false) {
+  : SparcTargetMachine(T, TT, CPU, FS, false) {
 }
 
 SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
                                            const std::string &TT, 
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : SparcTargetMachine(T, TT, FS, true) {
+  : SparcTargetMachine(T, TT, CPU, FS, true) {
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index c4bb6bd776d4..799fc497f4ae 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,7 +34,8 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS, bool is64bit);
+                     const std::string &CPU, const std::string &FS,
+                     bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
@@ -62,7 +63,7 @@ public:
 class SparcV8TargetMachine : public SparcTargetMachine {
 public:
   SparcV8TargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 };
 
 /// SparcV9TargetMachine - Sparc 64-bit target machine
@@ -70,7 +71,7 @@ public:
 class SparcV9TargetMachine : public SparcTargetMachine {
 public:
   SparcV9TargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 1f5d3552ae7e..f4bdbd8cd173 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS SystemZ.td)
 
-tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SystemZGenRegisterNames.inc -gen-register-enums)
-tablegen(SystemZGenRegisterInfo.inc -gen-register-desc)
-tablegen(SystemZGenInstrNames.inc -gen-instr-enums)
-tablegen(SystemZGenInstrInfo.inc -gen-instr-desc)
+tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
+tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
 tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
 tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
 tablegen(SystemZGenCallingConv.inc -gen-callingconv)
-tablegen(SystemZGenSubtarget.inc -gen-subtarget)
+tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(SystemZCodeGen
   SystemZAsmPrinter.cpp
@@ -16,7 +13,6 @@ add_llvm_target(SystemZCodeGen
   SystemZISelLowering.cpp
   SystemZInstrInfo.cpp
   SystemZFrameLowering.cpp
-  SystemZMCAsmInfo.cpp
   SystemZRegisterInfo.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
@@ -24,3 +20,4 @@ add_llvm_target(SystemZCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..2ac90164721f
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMSystemZDesc
+  SystemZMCTargetDesc.cpp
+  SystemZMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..08f1a9d51fb5
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 2dc7e7bd29bb..8540546b62d3 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -18,6 +18,8 @@
 using namespace llvm;
 
 SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  PointerSize = 8;
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
   PCSymbol = ".";
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index a6a27e2f4b6d..a6a27e2f4b6d 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 000000000000..5a826a6ef887
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSystemZMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+                                      createSystemZMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+                                          createSystemZMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+  RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 000000000000..e2ad5afd6e57
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSystemZTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 6930e14c061e..6356491debeb 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSystemZCodeGen
 TARGET = SystemZ
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
-                SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \
-                SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \
-                SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc
+BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \
+		SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \
+		SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index ea5240a10c9a..88960b9cc601 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_SystemZ_H
 #define LLVM_TARGET_SystemZ_H
 
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -47,15 +48,5 @@ namespace llvm {
   FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
                                     CodeGenOpt::Level OptLevel);
 
-  extern Target TheSystemZTarget;
-
 } // end namespace llvm;
-
-// Defines symbolic names for SystemZ registers.
-// This defines a mapping from register name to register number.
-#include "SystemZGenRegisterNames.inc"
-
-// Defines symbolic names for the SystemZ instructions.
-#include "SystemZGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index af85df53b059..871c2972a8c4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -142,6 +142,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setOperationAction(ISD::FCOS,             MVT::f64, Expand);
   setOperationAction(ISD::FREM,             MVT::f32, Expand);
   setOperationAction(ISD::FREM,             MVT::f64, Expand);
+  setOperationAction(ISD::FMA,              MVT::f32, Expand);
+  setOperationAction(ISD::FMA,              MVT::f64, Expand);
 
   // We have only 64-bit bitconverts
   setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 2f2ef08dece1..ab45ec5984e3 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -108,11 +108,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
   unsigned Flags = 0;
-  if (TID.mayLoad())
+  if (MCID.mayLoad())
     Flags |= MachineMemOperand::MOLoad;
-  if (TID.mayStore())
+  if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index be5280323c34..99e2730609e8 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -16,17 +16,21 @@
 #include "SystemZInstrInfo.h"
 #include "SystemZMachineFunctionInfo.h"
 #include "SystemZTargetMachine.h"
-#include "SystemZGenInstrInfo.inc"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SystemZGenInstrInfo.inc"
+
 using namespace llvm;
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
-  : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
+  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
     RI(tm, *this), TM(tm) {
 }
 
@@ -199,13 +203,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -343,7 +347,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return Count;
 }
 
-const TargetInstrDesc&
+const MCInstrDesc&
 SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
   switch (CC) {
   default:
@@ -408,7 +412,7 @@ SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
   }
 }
 
-const TargetInstrDesc&
+const MCInstrDesc&
 SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
   switch (Opc) {
   default:
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 6cb720010207..6a31e9496365 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -19,6 +19,9 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
 namespace llvm {
 
 class SystemZTargetMachine;
@@ -47,7 +50,7 @@ namespace SystemZII {
   };
 }
 
-class SystemZInstrInfo : public TargetInstrInfoImpl {
+class SystemZInstrInfo : public SystemZGenInstrInfo {
   const SystemZRegisterInfo RI;
   SystemZTargetMachine &TM;
 public:
@@ -94,10 +97,10 @@ public:
 
   SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
   SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
-  const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
-  const TargetInstrDesc& getLongDispOpc(unsigned Opc) const;
+  const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+  const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
 
-  const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+  const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
     if (Offset < 0 || Offset >= 4096)
       return getLongDispOpc(Opc);
     else
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ed62cfff08aa..59692e883366 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -25,12 +25,15 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
 using namespace llvm;
 
 SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
                                          const SystemZInstrInfo &tii)
-  : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
-    TM(tm), TII(tii) {
+  : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
 }
 
 const unsigned*
@@ -51,10 +54,20 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const
   BitVector Reserved(getNumRegs());
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-  if (TFI->hasFP(MF))
+  if (TFI->hasFP(MF)) {
+    // R11D is the frame pointer. Reserve all aliases.
     Reserved.set(SystemZ::R11D);
+    Reserved.set(SystemZ::R11W);
+    Reserved.set(SystemZ::R10P);
+    Reserved.set(SystemZ::R10Q);
+  }
+
   Reserved.set(SystemZ::R14D);
   Reserved.set(SystemZ::R15D);
+  Reserved.set(SystemZ::R14W);
+  Reserved.set(SystemZ::R15W);
+  Reserved.set(SystemZ::R14P);
+  Reserved.set(SystemZ::R14Q);
   return Reserved;
 }
 
@@ -143,6 +156,3 @@ int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   assert(0 && "What is the dwarf register number");
   return -1;
 }
-
-
-#include "SystemZGenRegisterInfo.inc"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index cd8f20fee617..2e262e1acc30 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -15,7 +15,9 @@
 #define SystemZREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "SystemZGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 9313ffdb4a0b..a24cbcf4ccd8 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -161,318 +161,45 @@ def F15L : FPRL<15, "f15", [F15S]>;
 // Status register
 def PSW : SystemZReg<"psw">;
 
-/// Register classes
-def GR32 : RegisterClass<"SystemZ", [i32], 32,
-   // Volatile registers
-  [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
-   // Frame pointer, sometimes allocable
-   R11W,
-   // Volatile, but not allocable
-   R14W, R15W]>
-{
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG32[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, SystemZ::R11W,
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    static const unsigned SystemZ_REG32_nofp[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, /* No R11W */
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    GR32Class::iterator
-    GR32Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG32_nofp;
-      else
-        return SystemZ_REG32;
-    }
-    GR32Class::iterator
-    GR32Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
-    }
-  }];
-}
+/// Register classes.
+/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW",  0, 5),
+                                                    (sequence "R%uW", 15, 6))>;
 
 /// Registers used to generate address. Everything except R0.
-def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
-   // Volatile registers
-  [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
-   // Frame pointer, sometimes allocable
-   R11W,
-   // Volatile, but not allocable
-   R14W, R15W]>
-{
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_ADDR32[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, SystemZ::R11W,
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    static const unsigned SystemZ_ADDR32_nofp[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, /* No R11W */
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    ADDR32Class::iterator
-    ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR32_nofp;
-      else
-        return SystemZ_ADDR32;
-    }
-    ADDR32Class::iterator
-    ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
-    }
-  }];
-}
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
 
-def GR64 : RegisterClass<"SystemZ", [i64], 64,
-   // Volatile registers
-  [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
-   // Frame pointer, sometimes allocable
-   R11D,
-   // Volatile, but not allocable
-   R14D, R15D]>
-{
+def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD",  0, 5),
+                                                    (sequence "R%uD", 15, 6))> {
   let SubRegClasses = [(GR32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG64[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, SystemZ::R11D,
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    static const unsigned SystemZ_REG64_nofp[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, /* No R11D */
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    GR64Class::iterator
-    GR64Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64_nofp;
-      else
-        return SystemZ_REG64;
-    }
-    GR64Class::iterator
-    GR64Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
-    }
-  }];
 }
 
-def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
-   // Volatile registers
-  [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
-   // Frame pointer, sometimes allocable
-   R11D,
-   // Volatile, but not allocable
-   R14D, R15D]>
-{
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
   let SubRegClasses = [(ADDR32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_ADDR64[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, SystemZ::R11D,
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    static const unsigned SystemZ_ADDR64_nofp[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, /* No R11D */
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    ADDR64Class::iterator
-    ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR64_nofp;
-      else
-        return SystemZ_ADDR64;
-    }
-    ADDR64Class::iterator
-    ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
-    }
-  }];
 }
 
 // Even-odd register pairs
-def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
-  [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
-{
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
+                                                       R12P, R10P, R8P, R6P,
+                                                       R14P)> {
   let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG64P[] = {
-      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, SystemZ::R10P,
-      SystemZ::R8P,  SystemZ::R6P };
-    static const unsigned SystemZ_REG64P_nofp[] = {
-      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, /* NO R10P */
-      SystemZ::R8P,  SystemZ::R6P };
-    GR64PClass::iterator
-    GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64P_nofp;
-      else
-        return SystemZ_REG64P;
-    }
-    GR64PClass::iterator
-    GR64PClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
-    }
-  }];
 }
 
-def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
-  [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
-{
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
+                                                        R12Q, R10Q, R8Q, R6Q,
+                                                        R14Q)> {
   let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
-                         (GR64 subreg_even, subreg_odd)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG128[] = {
-      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q,  SystemZ::R10Q,
-      SystemZ::R8Q,  SystemZ::R6Q };
-    static const unsigned SystemZ_REG128_nofp[] = {
-      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q, /* NO R10Q */
-      SystemZ::R8Q,  SystemZ::R6Q };
-    GR128Class::iterator
-    GR128Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG128_nofp;
-      else
-        return SystemZ_REG128;
-    }
-    GR128Class::iterator
-    GR128Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
-    }
-  }];
+                       (GR64 subreg_even, subreg_odd)];
 }
 
-def FP32 : RegisterClass<"SystemZ", [f32], 32,
- [F0S, F1S,  F2S,  F3S,  F4S,  F5S,  F6S,  F7S,
-  F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REGFP32[] = {
-      SystemZ::F0S,  SystemZ::F2S,  SystemZ::F4S,  SystemZ::F6S,
-      SystemZ::F1S,  SystemZ::F3S,  SystemZ::F5S,  SystemZ::F7S,
-      SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
-      SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S };
-    FP32Class::iterator
-    FP32Class::allocation_order_begin(const MachineFunction &MF) const {
-      return SystemZ_REGFP32;
-    }
-    FP32Class::iterator
-    FP32Class::allocation_order_end(const MachineFunction &MF) const {
-      return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned));
-    }
-  }];
-}
+def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
 
-def FP64 : RegisterClass<"SystemZ", [f64], 64,
- [F0L, F1L,  F2L,  F3L,  F4L,  F5L,  F6L,  F7L, 
-  F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
+def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
   let SubRegClasses = [(FP32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REGFP64[] = {
-      SystemZ::F0L,  SystemZ::F2L,  SystemZ::F4L,  SystemZ::F6L,
-      SystemZ::F1L,  SystemZ::F3L,  SystemZ::F5L,  SystemZ::F7L,
-      SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
-      SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L };
-    FP64Class::iterator
-    FP64Class::allocation_order_begin(const MachineFunction &MF) const {
-      return SystemZ_REGFP64;
-    }
-    FP64Class::iterator
-    FP64Class::allocation_order_end(const MachineFunction &MF) const {
-      return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned));
-    }
-  }];
 }
 
 // Status flags registers.
-def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> {
+def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
 }
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index a8b5e1f18679..b3ed06639758 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -7,25 +7,32 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the SystemZ specific subclass of TargetSubtarget.
+// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SystemZSubtarget.h"
 #include "SystemZ.h"
-#include "SystemZGenSubtarget.inc"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
 
 using namespace llvm;
 
 SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
+                                   const std::string &CPU,
                                    const std::string &FS):
-  HasZ10Insts(false) {
-  std::string CPU = "z9";
+  SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "z9";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
 
 /// True if accessing the GV requires an extra load.
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 405d6e91b7ee..55cfd80002bc 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -7,33 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the SystemZ specific subclass of TargetSubtarget.
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
 #define LLVM_TARGET_SystemZ_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 class TargetMachine;
 
-class SystemZSubtarget : public TargetSubtarget {
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
   bool HasZ10Insts;
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  SystemZSubtarget(const std::string &TT, const std::string &FS);
+  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isZ10() const { return HasZ10Insts; }
 
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 160389942998..48298cc744e7 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SystemZMCAsmInfo.h"
 #include "SystemZTargetMachine.h"
 #include "SystemZ.h"
 #include "llvm/PassManager.h"
@@ -17,16 +16,16 @@ using namespace llvm;
 extern "C" void LLVMInitializeSystemZTarget() {
   // Register the target.
   RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
-  RegisterAsmInfo<SystemZMCAsmInfo> Y(TheSystemZTarget);
 }
 
 /// SystemZTargetMachine ctor - Create an ILP64 architecture model
 ///
 SystemZTargetMachine::SystemZTargetMachine(const Target &T,
                                            const std::string &TT,
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
                "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 524f83d13229..e40b556c0c3c 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -38,7 +38,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   SystemZFrameLowering    FrameLowering;
 public:
   SystemZTargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 0919fe42dc0e..a42ce548c895 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -97,10 +97,6 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructT
   return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
 }
 
-void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) {
-  unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy));
-}
-
 void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
   delete unwrap(TD);
 }
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6fa5420120f5..a97b0e868989 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -17,11 +17,7 @@ using namespace llvm;
 
 TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
   TLOF = &TM.getTargetLowering()->getObjFileLowering();
-  const TargetData &TD = *TM.getTargetData();
-  IsLittleEndian = TD.isLittleEndian();
-  PointerSize = TD.getPointerSize();
-  const TargetFrameLowering &TFI = *TM.getFrameLowering();
-  StackDir = TFI.getStackGrowthDirection();
+  TFI = TM.getFrameLowering();
   TRI = TM.getRegisterInfo();
-  TFI.getInitialFrameState(InitialFrameState);
+  TFI->getInitialFrameState(InitialFrameState);
 }
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 1990bc7b929c..17d022a339e6 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -42,6 +42,7 @@ char TargetData::ID = 0;
 //===----------------------------------------------------------------------===//
 
 StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+  assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
   NumElements = ST->getNumElements();
@@ -313,63 +314,21 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
 
 namespace {
 
-class StructLayoutMap : public AbstractTypeUser {
+class StructLayoutMap {
   typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
   LayoutInfoTy LayoutInfo;
 
-  void RemoveEntry(LayoutInfoTy::iterator I, bool WasAbstract) {
-    I->second->~StructLayout();
-    free(I->second);
-    if (WasAbstract)
-      I->first->removeAbstractTypeUser(this);
-    LayoutInfo.erase(I);
-  }
-
-
-  /// refineAbstractType - The callback method invoked when an abstract type is
-  /// resolved to another type.  An object must override this method to update
-  /// its internal state to reference NewType instead of OldType.
-  ///
-  virtual void refineAbstractType(const DerivedType *OldTy,
-                                  const Type *) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(OldTy));
-    assert(I != LayoutInfo.end() && "Using type but not in map?");
-    RemoveEntry(I, true);
-  }
-
-  /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware
-  /// of is when a type makes the transition from being abstract (where it has
-  /// clients on its AbstractTypeUsers list) to concrete (where it does not).
-  /// This method notifies ATU's when this occurs for a type.
-  ///
-  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(AbsTy));
-    assert(I != LayoutInfo.end() && "Using type but not in map?");
-    RemoveEntry(I, true);
-  }
-
 public:
   virtual ~StructLayoutMap() {
     // Remove any layouts.
-    for (LayoutInfoTy::iterator
-           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
-      const Type *Key = I->first;
+    for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+         I != E; ++I) {
       StructLayout *Value = I->second;
-
-      if (Key->isAbstract())
-        Key->removeAbstractTypeUser(this);
-
       Value->~StructLayout();
       free(Value);
     }
   }
 
-  void InvalidateEntry(const StructType *Ty) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
-    if (I == LayoutInfo.end()) return;
-    RemoveEntry(I, Ty->isAbstract());
-  }
-
   StructLayout *&operator[](const StructType *STy) {
     return LayoutInfo[STy];
   }
@@ -404,22 +363,9 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 
   new (L) StructLayout(Ty, *this);
 
-  if (Ty->isAbstract())
-    Ty->addAbstractTypeUser(STM);
-
   return L;
 }
 
-/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
-/// objects.  If a TargetData object is alive when types are being refined and
-/// removed, this method must be called whenever a StructType is removed to
-/// avoid a dangling pointer in this cache.
-void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
-  if (!LayoutMap) return;  // No cache.
-
-  static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
-}
-
 std::string TargetData::getStringRepresentation() const {
   std::string Result;
   raw_string_ostream OS(Result);
@@ -570,7 +516,7 @@ unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
 
 /// getIntPtrType - Return an unsigned integer type that is the same size or
 /// greater to the host pointer size.
-const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
   return IntegerType::get(C, getPointerSizeInBits());
 }
 
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d4b76972e49a..d52ecb32cf75 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -12,44 +12,39 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
-//  TargetOperandInfo
+//  TargetInstrInfo
 //===----------------------------------------------------------------------===//
 
-/// getRegClass - Get the register class for the operand, handling resolution
-/// of "symbolic" pointer register classes etc.  If this is not a register
-/// operand, this returns null.
-const TargetRegisterClass *
-TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
-  if (isLookupPtrRegClass())
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
     return TRI->getPointerRegClass(RegClass);
+
   // Instructions like INSERT_SUBREG do not have fixed register classes.
   if (RegClass < 0)
     return 0;
+
   // Otherwise just look it up normally.
   return TRI->getRegClass(RegClass);
 }
 
-//===----------------------------------------------------------------------===//
-//  TargetInstrInfo
-//===----------------------------------------------------------------------===//
-
-TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
-                                 unsigned numOpcodes)
-  : Descriptors(Desc), NumOpcodes(numOpcodes) {
-}
-
-TargetInstrInfo::~TargetInstrInfo() {
-}
-
 unsigned
 TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
                                 const MachineInstr *MI) const {
@@ -135,13 +130,13 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 
 
 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 3343384791fb..703431b3806e 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -35,38 +35,39 @@ using namespace llvm;
 //                              Generic Code
 //===----------------------------------------------------------------------===//
 
-TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
-  TextSection = 0;
-  DataSection = 0;
-  BSSSection = 0;
-  ReadOnlySection = 0;
-  StaticCtorSection = 0;
-  StaticDtorSection = 0;
-  LSDASection = 0;
-
-  CommDirectiveSupportsAlignment = true;
-  DwarfAbbrevSection = 0;
-  DwarfInfoSection = 0;
-  DwarfLineSection = 0;
-  DwarfFrameSection = 0;
-  DwarfPubNamesSection = 0;
-  DwarfPubTypesSection = 0;
-  DwarfDebugInlineSection = 0;
-  DwarfStrSection = 0;
-  DwarfLocSection = 0;
-  DwarfARangesSection = 0;
-  DwarfRangesSection = 0;
-  DwarfMacroInfoSection = 0;
-  
-  IsFunctionEHFrameSymbolPrivate = true;
-  SupportsWeakOmittedEHFrame = true;
+TargetLoweringObjectFile::TargetLoweringObjectFile() :
+  Ctx(0),
+  TextSection(0),
+  DataSection(0),
+  BSSSection(0),
+  ReadOnlySection(0),
+  StaticCtorSection(0),
+  StaticDtorSection(0),
+  LSDASection(0),
+  CompactUnwindSection(0),
+  DwarfAbbrevSection(0),
+  DwarfInfoSection(0),
+  DwarfLineSection(0),
+  DwarfFrameSection(0),
+  DwarfPubNamesSection(0),
+  DwarfPubTypesSection(0),
+  DwarfDebugInlineSection(0),
+  DwarfStrSection(0),
+  DwarfLocSection(0),
+  DwarfARangesSection(0),
+  DwarfRangesSection(0),
+  DwarfMacroInfoSection(0),
+  TLSExtraDataSection(0),
+  CommDirectiveSupportsAlignment(true),
+  SupportsWeakOmittedEHFrame(true), 
+  IsFunctionEHFrameSymbolPrivate(true) {
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
 }
 
 static bool isSuitableForBSS(const GlobalVariable *GV) {
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
 
   // Must have zero initializer.
   if (!C->isNullValue())
@@ -168,7 +169,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getBSS();
   }
 
-  Constant *C = GVar->getInitializer();
+  const Constant *C = GVar->getInitializer();
 
   // If the global is marked constant, we can put it into a mergable section,
   // a mergable string section, or general .data if it contains relocations.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 863b8114dc30..74a1f4e8da56 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -43,7 +43,7 @@ namespace llvm {
   Reloc::Model RelocationModel;
   CodeModel::Model CMModel;
   bool GuaranteedTailCallOpt;
-  unsigned StackAlignment;
+  unsigned StackAlignmentOverride;
   bool RealignStack;
   bool DisableJumpTables;
   bool StrongPHIElim;
@@ -183,7 +183,7 @@ EnableGuaranteedTailCallOpt("tailcallopt",
 static cl::opt<unsigned, true>
 OverrideStackAlignment("stack-alignment",
   cl::desc("Override default stack alignment"),
-  cl::location(StackAlignment),
+  cl::location(StackAlignmentOverride),
   cl::init(0));
 static cl::opt<bool, true>
 EnableRealignStack("realign-stack",
@@ -216,8 +216,9 @@ FunctionSections("ffunction-sections",
 // TargetMachine Class
 //
 
-TargetMachine::TargetMachine(const Target &T) 
-  : TheTarget(T), AsmInfo(0),
+TargetMachine::TargetMachine(const Target &T,
+                             StringRef TT, StringRef CPU, StringRef FS)
+  : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
     MCRelaxAll(false),
     MCNoExecStack(false),
     MCSaveTempLabels(false),
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 1c3f2dda33c7..90a8f8d8fdcc 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -20,21 +20,11 @@
 
 using namespace llvm;
 
-TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                              regclass_iterator RCB, regclass_iterator RCE,
-                             const char *const *subregindexnames,
-                             int CFSO, int CFDO,
-                             const unsigned* subregs, const unsigned subregsize,
-                         const unsigned* aliases, const unsigned aliasessize)
-  : SubregHash(subregs), SubregHashSize(subregsize),
-    AliasesHash(aliases), AliasesHashSize(aliasessize),
-    Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+                             const char *const *subregindexnames)
+  : InfoDesc(ID), SubRegIndexNames(subregindexnames),
     RegClassBegin(RCB), RegClassEnd(RCE) {
-  assert(isPhysicalRegister(NumRegs) &&
-         "Target has too many physical registers!");
-
-  CallFrameSetupOpcode   = CFSO;
-  CallFrameDestroyOpcode = CFDO;
 }
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
@@ -83,14 +73,14 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
 /// registers for the specific register class.
 static void getAllocatableSetForRC(const MachineFunction &MF,
                                    const TargetRegisterClass *RC, BitVector &R){
-  for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-    R.set(*I);
+  ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
 }
 
 BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
                                           const TargetRegisterClass *RC) const {
-  BitVector Allocatable(NumRegs);
+  BitVector Allocatable(getNumRegs());
   if (RC) {
     getAllocatableSetForRC(MF, RC, Allocatable);
   } else {
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtargetInfo.cpp
index edb76f971533..59ffdea00ea6 100644
--- a/lib/Target/TargetSubtarget.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,18 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 //---------------------------------------------------------------------------
-// TargetSubtarget Class
+// TargetSubtargetInfo Class
 //
-TargetSubtarget::TargetSubtarget() {}
+TargetSubtargetInfo::TargetSubtargetInfo() {}
 
-TargetSubtarget::~TargetSubtarget() {}
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
 
-bool TargetSubtarget::enablePostRAScheduler(
+bool TargetSubtargetInfo::enablePostRAScheduler(
           CodeGenOpt::Level OptLevel,
           AntiDepBreakMode& Mode,
           RegClassVector& CriticalPathRCs) const {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c352bfcd8cce..d45dd352fbc4 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -15,9 +15,11 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -25,17 +27,15 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 namespace {
 struct X86Operand;
 
 class X86ATTAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
-  TargetMachine &TM;
-
-protected:
-  unsigned Is64Bit : 1;
 
 private:
   MCAsmParser &getParser() const { return Parser; }
@@ -61,6 +61,11 @@ private:
   /// or %es:(%edi) in 32bit mode.
   bool isDstOp(X86Operand &Op);
 
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -70,12 +75,11 @@ private:
   /// }
 
 public:
-  X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
-    : TargetAsmParser(T), Parser(parser), TM(TM) {
+  X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : TargetAsmParser(), STI(sti), Parser(parser) {
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(
-                           &TM.getSubtarget<X86Subtarget>()));
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
@@ -84,23 +88,6 @@ public:
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
-
-class X86_32ATTAsmParser : public X86ATTAsmParser {
-public:
-  X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, Parser, TM) {
-    Is64Bit = false;
-  }
-};
-
-class X86_64ATTAsmParser : public X86ATTAsmParser {
-public:
-  X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, Parser, TM) {
-    Is64Bit = true;
-  }
-};
-
 } // end anonymous namespace
 
 /// @name Auto-generated Match Functions
@@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand {
   /// getEndLoc - Get the location of the last token of this operand.
   SMLoc getEndLoc() const { return EndLoc; }
 
-  virtual void dump(raw_ostream &OS) const {}
+  virtual void print(raw_ostream &OS) const {}
 
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
@@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand {
 } // end anonymous namespace.
 
 bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
-  unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+  unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
 
   return (Op.isMem() &&
     (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
@@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
 }
 
 bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
-  unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+  unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
 
   return Op.isMem() && Op.Mem.SegReg == X86::ES &&
     isa<MCConstantExpr>(Op.Mem.Disp) &&
@@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
   // FIXME: This should be done using Requires<In32BitMode> and
   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
   // can be also checked.
-  if (RegNo == X86::RIZ && !Is64Bit)
+  if (RegNo == X86::RIZ && !is64BitMode())
     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
     }
   }
 
-  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
-  if (PatchedName.startswith("vpclmul")) {
-    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
-      PatchedName.slice(7, PatchedName.size() - 2))
-      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
-      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
-      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
-      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
-      .Default(~0U);
-    if (CLMULQuadWordSelect != ~0U) {
-      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
-                                          getParser().getContext());
-      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
-      PatchedName = "vpclmulqdq";
-    }
-  }
-
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (ExtraImmOp)
@@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
   if (Name.startswith("movs") && Operands.size() == 3 &&
       (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
-       (Is64Bit && Name == "movsq"))) {
+       (is64BitMode() && Name == "movsq"))) {
     X86Operand &Op = *(X86Operand*)Operands.begin()[1];
     X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
     if (isSrcOp(Op) && isDstOp(Op2)) {
@@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
   if (Name.startswith("lods") && Operands.size() == 3 &&
       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
-       Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+       Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
     X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
     X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
     if (isSrcOp(*Op1) && Op2->isReg()) {
@@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
   if (Name.startswith("stos") && Operands.size() == 3 &&
       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
-       Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+       Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
     X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
     X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
     if (isDstOp(*Op2) && Op1->isReg()) {
@@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer();
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
-  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
+  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
   LLVMInitializeX86AsmLexer();
 }
 
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index b5fa94f12bc7..b112f9ff69bb 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -1,18 +1,15 @@
 set(LLVM_TARGET_DEFINITIONS X86.td)
 
-tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(X86GenRegisterNames.inc -gen-register-enums)
-tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenRegisterInfo.inc -gen-register-info)
 tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(X86GenInstrNames.inc -gen-instr-enums)
-tablegen(X86GenInstrInfo.inc -gen-instr-desc)
+tablegen(X86GenInstrInfo.inc -gen-instr-info)
 tablegen(X86GenAsmWriter.inc -gen-asm-writer)
 tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
 tablegen(X86GenDAGISel.inc -gen-dag-isel)
 tablegen(X86GenFastISel.inc -gen-fast-isel)
 tablegen(X86GenCallingConv.inc -gen-callingconv)
-tablegen(X86GenSubtarget.inc -gen-subtarget)
+tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
 tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 
 set(sources
@@ -30,7 +27,6 @@ set(sources
   X86InstrInfo.cpp
   X86JITInfo.cpp
   X86MachObjectWriter.cpp
-  X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp 
   X86MCInstLower.cpp
   X86RegisterInfo.cpp
@@ -60,5 +56,6 @@ add_llvm_target(X86CodeGen ${sources})
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
 add_subdirectory(TargetInfo)
 add_subdirectory(Utils)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index d8a105e7e9d2..4a0d2ec727a9 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -26,7 +26,8 @@
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include "X86GenRegisterNames.inc"
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
 #include "X86GenEDInfo.inc"
 
 using namespace llvm;
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 68247d2f1a5b..c37d8797b39c 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,30 +15,23 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86ATTInstPrinter.h"
 #include "X86InstComments.h"
-#include "X86Subtarget.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
 #include <map>
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define GET_INSTRUCTION_NAME
 #define PRINT_ALIAS_INSTR
-#include "X86GenRegisterNames.inc"
 #include "X86GenAsmWriter.inc"
-#undef PRINT_ALIAS_INSTR
-#undef GET_INSTRUCTION_NAME
 
-X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
   : MCInstPrinter(MAI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(ComputeAvailableFeatures(
-            &TM.getSubtarget<X86Subtarget>()));
 }
 
 void X86ATTInstPrinter::printRegName(raw_ostream &OS,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 5f939b61da21..5426e5cf38d9 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -19,19 +19,15 @@
 namespace llvm {
 
 class MCOperand;
-class X86Subtarget;
-class TargetMachine;
   
 class X86ATTInstPrinter : public MCInstPrinter {
 public:
-  X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
+  X86ATTInstPrinter(const MCAsmInfo &MAI);
   
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &OS);
   virtual StringRef getOpcodeName(unsigned Opcode) const;
 
-  // Methods used to print the alias of an instruction.
-  unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index c642acc3b9a2..4e28dfe7fa81 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86InstComments.h"
-#include "X86GenInstrNames.inc"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/raw_ostream.h"
 #include "../Utils/X86ShuffleDecode.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 5f581bab3906..506e26cbf7cd 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,13 +15,12 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86IntelInstPrinter.h"
 #include "X86InstComments.h"
-#include "X86Subtarget.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
 #include <cctype>
 using namespace llvm;
 
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index c8030c3ecdac..e84a1940017d 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -20,11 +20,10 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
   
 class X86IntelInstPrinter : public MCInstPrinter {
 public:
-  X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+  X86IntelInstPrinter(const MCAsmInfo &MAI)
     : MCInstPrinter(MAI) {}
 
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..ca88f8ffd08c
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMX86Desc
+  X86MCTargetDesc.cpp
+  X86MCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/X86/MCTargetDesc/Makefile b/lib/Target/X86/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..b19774ee379e
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2e1ec6317601..27031005bd09 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86MCAsmInfo.h"
-#include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = {
   "{flags}", "",
   "{dirflag}", "",
   "{fpsr}", "",
+  "{fpcr}", "",
   "{cc}", "cc",
   0,0};
 
-X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::x86_64;
+  if (is64Bit)
+    PointerSize = 8;
+
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
-    
-  bool is64Bit = Triple.getArch() == Triple::x86_64;
 
   TextAlignFillValue = 0x90;
 
@@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   ExceptionsType = ExceptionHandling::DwarfCFI;
 }
 
-const MCExpr *
-X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
-                                                   unsigned Encoding,
-                                                   MCStreamer &Streamer) const {
-  MCContext &Context = Streamer.getContext();
-  const MCExpr *Res =
-    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
-  const MCExpr *Four = MCConstantExpr::Create(4, Context);
-  return MCBinaryExpr::CreateAdd(Res, Four, Context);
-}
-
 X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
   : X86MCAsmInfoDarwin(Triple) {
 }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+  if (T.getArch() == Triple::x86_64)
+    PointerSize = 8;
+
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
 
@@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
     Data64bitsDirective = 0;
 }
 
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                                   unsigned Encoding,
+                                                   MCStreamer &Streamer) const {
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+  const MCExpr *Four = MCConstantExpr::Create(4, Context);
+  return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
 const MCSection *X86ELFMCAsmInfo::
 getNonexecutableStackSection(MCContext &Ctx) const {
   return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8eb30ec..2cd4c8eb30ec 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 000000000000..b77f37b03f19
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,185 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::x86_64)
+    return "+64bit-mode";
+  return "-64bit-mode";
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                             unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+                               unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = X86_MC::ParseX86Triple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+    CPUName = sys::getHostCPUName();
+#else
+    CPUName = "generic";
+#endif
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitX86MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitX86MCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+}
+
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+    if (TheTriple.getArch() == Triple::x86_64)
+      return new X86_64MCAsmInfoDarwin(TheTriple);
+    else
+      return new X86MCAsmInfoDarwin(TheTriple);
+  }
+
+  if (TheTriple.isOSWindows())
+    return new X86MCAsmInfoCOFF(TheTriple);
+
+  return new X86ELFMCAsmInfo(TheTriple);
+}
+
+extern "C" void LLVMInitializeX86MCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+  RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 000000000000..89ea22b31be2
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,60 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+namespace X86_MC {
+  std::string ParseX86Triple(StringRef TT);
+
+  /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+  /// the specified arguments.  If we can't run cpuid on the host, return true.
+  bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+  void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+  /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 12fb090d4dce..949661eb99e9 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -12,14 +12,13 @@ LIBRARYNAME = LLVMX86CodeGen
 TARGET = X86
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
-                X86GenRegisterInfo.inc X86GenInstrNames.inc \
-                X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
+BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \
+		X86GenAsmWriter.inc X86GenAsmMatcher.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
                 X86GenDisassemblerTables.inc X86GenFastISel.inc \
-                X86GenCallingConv.inc X86GenSubtarget.inc \
+                X86GenCallingConv.inc X86GenSubtargetInfo.inc \
 		X86GenEDInfo.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 0ca436690040..ec52dfb3e7d1 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_X86_H
 #define TARGET_X86_H
 
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -22,10 +23,12 @@ namespace llvm {
 
 class FunctionPass;
 class JITCodeEmitter;
+class MachineCodeEmitter;
 class MCCodeEmitter;
 class MCContext;
+class MCInstrInfo;
 class MCObjectWriter;
-class MachineCodeEmitter;
+class MCSubtargetInfo;
 class Target;
 class TargetAsmBackend;
 class X86TargetMachine;
@@ -57,10 +60,9 @@ FunctionPass *createSSEDomainFixPass();
 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
-MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
-                                         MCContext &Ctx);
-MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
-                                         MCContext &Ctx);
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
+                                      MCContext &Ctx);
 
 TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
 TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
@@ -84,17 +86,6 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-extern Target TheX86_32Target, TheX86_64Target;
-
 } // End llvm namespace
 
-// Defines symbolic names for X86 registers.  This defines a mapping from
-// register name to register number.
-//
-#include "X86GenRegisterNames.inc"
-
-// Defines symbolic names for the X86 instructions.
-//
-#include "X86GenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7bb96766cceb..4ccb43fe18cc 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,6 +17,13 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+                                  "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
 // X86 Subtarget features.
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 4d7d96dcb36b..9b556a55efd9 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
 
     // PUSH
   case X86::PUSHi8: return X86::PUSHi32;
+  case X86::PUSHi16: return X86::PUSHi32;
+  case X86::PUSH64i8: return X86::PUSH64i32;
+  case X86::PUSH64i16: return X86::PUSH64i32;
   }
 }
 
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index c2d53c4dd26c..99b4479a9fc9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createX86MCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(TM, MAI);
+    return new X86ATTInstPrinter(MAI);
   if (SyntaxVariant == 1)
-    return new X86IntelInstPrinter(TM, MAI);
+    return new X86IntelInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 56351756e8dd..77b99056ae00 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -44,11 +44,11 @@ def RetCC_X86Common : CallingConv<[
   // can only be used by ABI non-compliant code. This vector type is only
   // supported while using the AVX target feature.
   CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
 
   // MMX vector types are always returned in MM0. If the target doesn't have
   // MM0, it doesn't support these vector types.
-  CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
+  CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
 
   // Long double types are always returned in ST0 (even with SSE).
   CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -91,10 +91,7 @@ def RetCC_X86_64_C : CallingConv<[
   CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
   CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
 
-  // MMX vector types are always returned in XMM0 except for v1i64 which is
-  // returned in RAX. This disagrees with ABI documentation but is bug
-  // compatible with gcc.
-  CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
+  // MMX vector types are always returned in XMM0.
   CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
   CCDelegateTo<RetCC_X86Common>
 ]>;
@@ -102,11 +99,7 @@ def RetCC_X86_64_C : CallingConv<[
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
-
-  // And FP in XMM0 only.
-  CCIfType<[f32], CCAssignToReg<[XMM0]>>,
-  CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
   // Otherwise, everything is the same as 'normal' X86-64 C CC.
   CCDelegateTo<RetCC_X86_64_C>
@@ -150,17 +143,11 @@ def CC_X86_64_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
 
-  // The first 6 v1i64 vector arguments are passed in GPRs on Darwin.
-  CCIfType<[v1i64],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCBitConvertToType<i64>>>,
-
   // The first 6 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
   CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
 
-  // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
-  // registers on Darwin.
+  // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
   CCIfType<[x86mmx],
             CCIfSubtarget<"isTargetDarwin()",
             CCIfSubtarget<"hasXMMInt()",
@@ -189,10 +176,7 @@ def CC_X86_64_C : CallingConv<[
 
   // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
   CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-           CCAssignToStack<32, 32>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+           CCAssignToStack<32, 32>>
 ]>;
 
 // Calling convention used on Win64
@@ -210,7 +194,7 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
 
   // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -236,10 +220,7 @@ def CC_X86_Win64_C : CallingConv<[
 
   // Long doubles get stack slots whose size and alignment depends on the
   // subtarget.
-  CCIfType<[f80], CCAssignToStack<0, 0>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[f80], CCAssignToStack<0, 0>>
 ]>;
 
 def CC_X86_64_GHC : CallingConv<[
@@ -273,8 +254,8 @@ def CC_X86_32_Common : CallingConv<[
                 CCIfSubtarget<"hasXMMInt()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
 
-  // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
-  // registers if the call is not a vararg call.
+  // The first 3 __m64 vector arguments are passed in mmx registers if the
+  // call is not a vararg call.
   CCIfNotVarArg<CCIfType<[x86mmx],
                 CCAssignToReg<[MM0, MM1, MM2]>>>,
 
@@ -306,7 +287,7 @@ def CC_X86_32_Common : CallingConv<[
 
   // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
   // passed in the parameter area.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
+  CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
 
 def CC_X86_32_C : CallingConv<[
   // Promote i8/i16 arguments to i32.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 421e221d205c..4b11db7c0331 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -68,7 +68,7 @@ namespace {
       return "X86 Machine Code Emitter";
     }
 
-    void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
+    void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -132,7 +132,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
       MCE.StartMachineBasicBlock(MBB);
       for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I) {
-        const TargetInstrDesc &Desc = I->getDesc();
+        const MCInstrDesc &Desc = I->getDesc();
         emitInstruction(*I, &Desc);
         // MOVPC32r is basically a call plus a pop instruction.
         if (Desc.getOpcode() == X86::MOVPC32r)
@@ -150,7 +150,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
 /// size, and 3) use of X86-64 extended registers.
 static unsigned determineREX(const MachineInstr &MI) {
   unsigned REX = 0;
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   
   // Pseudo instructions do not need REX prefix byte.
   if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -161,7 +161,7 @@ static unsigned determineREX(const MachineInstr &MI) {
   unsigned NumOps = Desc.getNumOperands();
   if (NumOps) {
     bool isTwoAddr = NumOps > 1 &&
-    Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+    Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
     
     // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
     unsigned i = isTwoAddr ? 1 : 0;
@@ -598,7 +598,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
-                                           const TargetInstrDesc *Desc) {
+                                           const MCInstrDesc *Desc) {
   DEBUG(dbgs() << MI);
   
   // If this is a pseudo instruction, lower it.
@@ -708,9 +708,9 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
   // If this is a two-address instruction, skip one of the register operands.
   unsigned NumOps = Desc->getNumOperands();
   unsigned CurOp = 0;
-  if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+  if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
     ++CurOp;
-  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
     // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
     --NumOps;
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f1b9972530c6..21e163a30054 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -15,6 +15,7 @@
 
 #include "X86.h"
 #include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
@@ -1392,7 +1393,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
     assert(DI->getAddress() && "Null address should be checked earlier!");
     if (!X86SelectAddress(DI->getAddress(), AM))
       return false;
-    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     // FIXME may need to add RegState::Debug to any registers produced,
     // although ESP/EBP should be the only ones at the moment.
     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
@@ -1493,7 +1494,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     return false;
 
   // Fast-isel doesn't know about callee-pop yet.
-  if (Subtarget->IsCalleePop(isVarArg, CC))
+  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+                       GuaranteedTailCallOpt))
     return false;
 
   // Check whether the function can return without sret-demotion.
@@ -1628,7 +1630,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
   // Issue CALLSEQ_START
-  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
     .addImm(NumBytes);
 
@@ -1801,7 +1803,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     MIB.addReg(RegArgs[i]);
 
   // Issue CALLSEQ_END
-  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   unsigned NumBytesCallee = 0;
   if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
     NumBytesCallee = 4;
@@ -1846,16 +1848,19 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     // stack, but where we prefer to use the value in xmm registers, copy it
     // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
     if ((RVLocs[i].getLocReg() == X86::ST0 ||
-         RVLocs[i].getLocReg() == X86::ST1) &&
-        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
-      CopyVT = MVT::f80;
-      CopyReg = createResultReg(X86::RFP80RegisterClass);
+         RVLocs[i].getLocReg() == X86::ST1)) {
+      if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+        CopyVT = MVT::f80;
+        CopyReg = createResultReg(X86::RFP80RegisterClass);
+      }
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+              CopyReg);
+    } else {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              CopyReg).addReg(RVLocs[i].getLocReg());
+      UsedRegs.push_back(RVLocs[i].getLocReg());
     }
 
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            CopyReg).addReg(RVLocs[i].getLocReg());
-    UsedRegs.push_back(RVLocs[i].getLocReg());
-
     if (CopyVT != RVLocs[i].getValVT()) {
       // Round the F80 the right size, which also moves to the appropriate xmm
       // register. This is accomplished by storing the F80 value in memory and
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 325d0611817d..6eed6abd43e2 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -126,10 +127,45 @@ namespace {
     void bundleCFG(MachineFunction &MF);
 
     MachineBasicBlock *MBB;     // Current basic block
+
+    // The hardware keeps track of how many FP registers are live, so we have
+    // to model that exactly. Usually, each live register corresponds to an
+    // FP<n> register, but when dealing with calls, returns, and inline
+    // assembly, it is sometimes neccesary to have live scratch registers.
     unsigned Stack[8];          // FP<n> Registers in each stack slot...
-    unsigned RegMap[8];         // Track which stack slot contains each register
     unsigned StackTop;          // The current top of the FP stack.
 
+    enum {
+      NumFPRegs = 16            // Including scratch pseudo-registers.
+    };
+
+    // For each live FP<n> register, point to its Stack[] entry.
+    // The first entries correspond to FP0-FP6, the rest are scratch registers
+    // used when we need slightly different live registers than what the
+    // register allocator thinks.
+    unsigned RegMap[NumFPRegs];
+
+    // Pending fixed registers - Inline assembly needs FP registers to appear
+    // in fixed stack slot positions. This is handled by copying FP registers
+    // to ST registers before the instruction, and copying back after the
+    // instruction.
+    //
+    // This is modeled with pending ST registers. NumPendingSTs is the number
+    // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+    // register that holds the ST value. The ST registers are not moved into
+    // place until immediately before the instruction that needs them.
+    //
+    // It can happen that we need an ST register to be live when no FP register
+    // holds the value:
+    //
+    //   %ST0 = COPY %FP4<kill>
+    //
+    // When that happens, we allocate a scratch FP register to hold the ST
+    // value. That means every register in PendingST must be live.
+
+    unsigned NumPendingSTs;
+    unsigned char PendingST[8];
+
     // Set up our stack model to match the incoming registers to MBB.
     void setupBlockStack();
 
@@ -142,13 +178,15 @@ namespace {
         dbgs() << " FP" << Stack[i];
         assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
       }
+      for (unsigned i = 0; i != NumPendingSTs; ++i)
+        dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
       dbgs() << "\n";
     }
 
     /// getSlot - Return the stack slot number a particular register number is
     /// in.
     unsigned getSlot(unsigned RegNo) const {
-      assert(RegNo < 8 && "Regno out of range!");
+      assert(RegNo < NumFPRegs && "Regno out of range!");
       return RegMap[RegNo];
     }
 
@@ -160,12 +198,17 @@ namespace {
 
     /// getScratchReg - Return an FP register that is not currently in use.
     unsigned getScratchReg() {
-      for (int i = 7; i >= 0; --i)
+      for (int i = NumFPRegs - 1; i >= 8; --i)
         if (!isLive(i))
           return i;
       llvm_unreachable("Ran out of scratch FP registers");
     }
 
+    /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+    bool isScratchReg(unsigned RegNo) {
+      return RegNo > 8 && RegNo < NumFPRegs;
+    }
+
     /// getStackEntry - Return the X86::FP<n> register in register ST(i).
     unsigned getStackEntry(unsigned STi) const {
       if (STi >= StackTop)
@@ -181,7 +224,7 @@ namespace {
 
     // pushReg - Push the specified FP<n> register onto the stack.
     void pushReg(unsigned Reg) {
-      assert(Reg < 8 && "Register number out of range!");
+      assert(Reg < NumFPRegs && "Register number out of range!");
       if (StackTop >= 8)
         report_fatal_error("Stack overflow!");
       Stack[StackTop] = Reg;
@@ -236,7 +279,7 @@ namespace {
     /// Adjust the live registers to be the set in Mask.
     void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
 
-    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
     /// st(0), FP reg FixStack[1] is st(1) etc.
     void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
                          MachineBasicBlock::iterator I);
@@ -251,7 +294,14 @@ namespace {
     void handleCondMovFP(MachineBasicBlock::iterator &I);
     void handleSpecialFP(MachineBasicBlock::iterator &I);
 
-    bool translateCopy(MachineInstr*);
+    // Check if a COPY instruction is using FP registers.
+    bool isFPCopy(MachineInstr *MI) {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcReg = MI->getOperand(1).getReg();
+
+      return X86::RFP80RegClass.contains(DstReg) ||
+        X86::RFP80RegClass.contains(SrcReg);
+    }
   };
   char FPS::ID = 0;
 }
@@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) {
 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
   bool Changed = false;
   MBB = &BB;
+  NumPendingSTs = 0;
 
   setupBlockStack();
 
@@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     if (MI->isInlineAsm())
       FPInstClass = X86II::SpecialFP;
 
-    if (MI->isCopy() && translateCopy(MI))
+    if (MI->isCopy() && isFPCopy(MI))
       FPInstClass = X86II::SpecialFP;
 
     if (FPInstClass == X86II::NotFP)
@@ -833,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
   // Kill registers by popping.
   if (Kills && I != MBB->begin()) {
     MachineBasicBlock::iterator I2 = llvm::prior(I);
-    for (;;) {
+    while (StackTop) {
       unsigned KReg = getStackEntry(0);
       if (!(Kills & (1 << KReg)))
         break;
@@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
       continue;
     // (Reg st0) (OldReg st0) = (Reg OldReg st0)
     moveToTop(Reg, I);
-    moveToTop(OldReg, I);
+    if (FixCount > 0)
+      moveToTop(OldReg, I);
   }
   DEBUG(dumpStack());
 }
@@ -1239,141 +1291,307 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   MachineInstr *MI = I;
   switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown SpecialFP instruction!");
-  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
-    assert(StackTop == 0 && "Stack should be empty after a call!");
-    pushReg(getFPReg(MI->getOperand(0)));
-    break;
-  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
-    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
-    // The pattern we expect is:
-    //  CALL
-    //  FP1 = FpGET_ST0
-    //  FP4 = FpGET_ST1
-    //
-    // At this point, we've pushed FP1 on the top of stack, so it should be
-    // present if it isn't dead.  If it was dead, we already emitted a pop to
-    // remove it from the stack and StackTop = 0.
-    
-    // Push FP4 as top of stack next.
-    pushReg(getFPReg(MI->getOperand(0)));
+  case TargetOpcode::COPY: {
+    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+    const MachineOperand &MO1 = MI->getOperand(1);
+    const MachineOperand &MO0 = MI->getOperand(0);
+    unsigned DstST = MO0.getReg() - X86::ST0;
+    unsigned SrcST = MO1.getReg() - X86::ST0;
+    bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+    // ST = COPY FP. Set up a pending ST register.
+    if (DstST < 8) {
+      unsigned SrcFP = getFPReg(MO1);
+      assert(isLive(SrcFP) && "Cannot copy dead register");
+      assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+      // Unallocated STs are marked as the nonexistent FP255.
+      while (NumPendingSTs <= DstST)
+        PendingST[NumPendingSTs++] = NumFPRegs;
+
+      // STi could still be live from a previous inline asm.
+      if (isScratchReg(PendingST[DstST])) {
+        DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+                     << '\n');
+        freeStackSlotBefore(MI, PendingST[DstST]);
+      }
 
-    // If StackTop was 0 before we pushed our operand, then ST(0) must have been
-    // dead.  In this case, the ST(1) value is the only thing that is live, so
-    // it should be on the TOS (after the pop that was emitted) and is.  Just
-    // continue in this case.
-    if (StackTop == 1)
+      // When the source is killed, allocate a scratch FP register.
+      if (KillsSrc) {
+        unsigned Slot = getSlot(SrcFP);
+        unsigned SR = getScratchReg();
+        PendingST[DstST] = SR;
+        Stack[Slot] = SR;
+        RegMap[SR] = Slot;
+      } else
+        PendingST[DstST] = SrcFP;
       break;
-    
-    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
-    // elements so that our accounting is correct.
-    unsigned RegOnTop = getStackEntry(0);
-    unsigned RegNo = getStackEntry(1);
-    
-    // Swap the slots the regs are in.
-    std::swap(RegMap[RegNo], RegMap[RegOnTop]);
-    
-    // Swap stack slot contents.
-    if (RegMap[RegOnTop] >= StackTop)
-      report_fatal_error("Access past stack top!");
-    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
-    break;
-  }
-  case X86::FpSET_ST0_32:
-  case X86::FpSET_ST0_64:
-  case X86::FpSET_ST0_80: {
-    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
-    // arguments that use an st constraint. We expect a sequence of
-    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
-    unsigned Op0 = getFPReg(MI->getOperand(0));
-
-    if (!MI->killsRegister(X86::FP0 + Op0)) {
-      // Duplicate Op0 into a temporary on the stack top.
-      duplicateToTop(Op0, getScratchReg(), I);
-    } else {
-      // Op0 is killed, so just swap it into position.
-      moveToTop(Op0, I);
     }
-    --StackTop;   // "Forget" we have something on the top of stack!
-    break;
-  }
-  case X86::FpSET_ST1_32:
-  case X86::FpSET_ST1_64:
-  case X86::FpSET_ST1_80: {
-    // Set up st(1) for inline asm. We are assuming that st(0) has already been
-    // set up by FpSET_ST0, and our StackTop is off by one because of it.
-    unsigned Op0 = getFPReg(MI->getOperand(0));
-    // Restore the actual StackTop from before Fp_SET_ST0.
-    // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
-    // are not enforcing the constraint.
-    ++StackTop;
-    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
-    if (!MI->killsRegister(X86::FP0 + Op0)) {
-      duplicateToTop(Op0, getScratchReg(), I);
-      moveToTop(RegOnTop, I);
-    } else if (getSTReg(Op0) != X86::ST1) {
-      // We have the wrong value at st(1). Shuffle! Untested!
-      moveToTop(getStackEntry(1), I);
-      moveToTop(Op0, I);
-      moveToTop(RegOnTop, I);
+
+    // FP = COPY ST. Extract fixed stack value.
+    // Any instruction defining ST registers must have assigned them to a
+    // scratch register.
+    if (SrcST < 8) {
+      unsigned DstFP = getFPReg(MO0);
+      assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+      assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+      unsigned SrcFP = PendingST[SrcST];
+      assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+      assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+      // DstFP steals the stack slot from SrcFP.
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
+
+      // Always treat the ST as killed.
+      PendingST[SrcST] = NumFPRegs;
+      while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+        --NumPendingSTs;
+      break;
     }
-    assert(StackTop >= 2 && "Too few live registers");
-    StackTop -= 2; // "Forget" both st(0) and st(1).
-    break;
-  }
-  case X86::MOV_Fp3232:
-  case X86::MOV_Fp3264:
-  case X86::MOV_Fp6432:
-  case X86::MOV_Fp6464: 
-  case X86::MOV_Fp3280:
-  case X86::MOV_Fp6480:
-  case X86::MOV_Fp8032:
-  case X86::MOV_Fp8064: 
-  case X86::MOV_Fp8080: {
-    const MachineOperand &MO1 = MI->getOperand(1);
-    unsigned SrcReg = getFPReg(MO1);
 
-    const MachineOperand &MO0 = MI->getOperand(0);
-    unsigned DestReg = getFPReg(MO0);
-    if (MI->killsRegister(X86::FP0+SrcReg)) {
+    // FP <- FP copy.
+    unsigned DstFP = getFPReg(MO0);
+    unsigned SrcFP = getFPReg(MO1);
+    assert(isLive(SrcFP) && "Cannot copy dead register");
+    if (KillsSrc) {
       // If the input operand is killed, we can just change the owner of the
       // incoming stack slot into the result.
-      unsigned Slot = getSlot(SrcReg);
-      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
-      Stack[Slot] = DestReg;
-      RegMap[DestReg] = Slot;
-
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
     } else {
-      // For FMOV we just duplicate the specified value to a new stack slot.
+      // For COPY we just duplicate the specified value to a new stack slot.
       // This could be made better, but would require substantial changes.
-      duplicateToTop(SrcReg, DestReg, I);
+      duplicateToTop(SrcFP, DstFP, I);
     }
+    break;
+  }
+
+  case X86::FpPOP_RETVAL: {
+    // The FpPOP_RETVAL instruction is used after calls that return a value on
+    // the floating point stack. We cannot model this with ST defs since CALL
+    // instructions have fixed clobber lists. This instruction is interpreted
+    // to mean that there is one more live register on the stack than we
+    // thought.
+    //
+    // This means that StackTop does not match the hardware stack between a
+    // call and the FpPOP_RETVAL instructions.  We do tolerate FP instructions
+    // between CALL and FpPOP_RETVAL as long as they don't overflow the
+    // hardware stack.
+    unsigned DstFP = getFPReg(MI->getOperand(0));
+
+    // Move existing stack elements up to reflect reality.
+    assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+    if (StackTop) {
+      std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+      for (unsigned i = 0; i != NumFPRegs; ++i)
+        ++RegMap[i];
     }
+    ++StackTop;
+
+    // DstFP is the new bottom of the stack.
+    Stack[0] = DstFP;
+    RegMap[DstFP] = 0;
+
+    // DstFP will be killed by processBasicBlock if this was a dead def.
     break;
+  }
+
   case TargetOpcode::INLINEASM: {
     // The inline asm MachineInstr currently only *uses* FP registers for the
     // 'f' constraint.  These should be turned into the current ST(x) register
-    // in the machine instr.  Also, any kills should be explicitly popped after
-    // the inline asm.
-    unsigned Kills = 0;
+    // in the machine instr.
+    //
+    // There are special rules for x87 inline assembly. The compiler must know
+    // exactly how many registers are popped and pushed implicitly by the asm.
+    // Otherwise it is not possible to restore the stack state after the inline
+    // asm.
+    //
+    // There are 3 kinds of input operands:
+    //
+    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+    //    popped input operand must be in a fixed stack slot, and it is either
+    //    tied to an output operand, or in the clobber list. The MI has ST use
+    //    and def operands for these inputs.
+    //
+    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+    //    preserved by the inline asm. The fixed stack slots must be STn-STm
+    //    following the popped inputs. A fixed input operand cannot be tied to
+    //    an output or appear in the clobber list. The MI has ST use operands
+    //    and no defs for these inputs.
+    //
+    // 3. Preserved inputs. These inputs use the "f" constraint which is
+    //    represented as an FP register. The inline asm won't change these
+    //    stack slots.
+    //
+    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+    // registers do not count as output operands. The inline asm changes the
+    // stack as if it popped all the popped inputs and then pushed all the
+    // output operands.
+
+    // Scan the assembly for ST registers used, defined and clobbered. We can
+    // only tell clobbers from defs by looking at the asm descriptor.
+    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+    unsigned NumOps = 0;
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+      unsigned Flags = MI->getOperand(i).getImm();
+      NumOps = InlineAsm::getNumOperandRegisters(Flags);
+      if (NumOps != 1)
+        continue;
+      const MachineOperand &MO = MI->getOperand(i + 1);
+      if (!MO.isReg())
+        continue;
+      unsigned STReg = MO.getReg() - X86::ST0;
+      if (STReg >= 8)
+        continue;
+
+      switch (InlineAsm::getKind(Flags)) {
+      case InlineAsm::Kind_RegUse:
+        STUses |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_RegDef:
+      case InlineAsm::Kind_RegDefEarlyClobber:
+        STDefs |= (1u << STReg);
+        if (MO.isDead())
+          STDeadDefs |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_Clobber:
+        STClobbers |= (1u << STReg);
+        break;
+      default:
+        break;
+      }
+    }
+
+    if (STUses && !isMask_32(STUses))
+      MI->emitError("fixed input regs must be last on the x87 stack");
+    unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+    // Defs must be contiguous from the stack top. ST0-STn.
+    if (STDefs && !isMask_32(STDefs)) {
+      MI->emitError("output regs must be last on the x87 stack");
+      STDefs = NextPowerOf2(STDefs) - 1;
+    }
+    unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+    // So must the clobbered stack slots. ST0-STm, m >= n.
+    if (STClobbers && !isMask_32(STDefs | STClobbers))
+      MI->emitError("clobbers must be last on the x87 stack");
+
+    // Popped inputs are the ones that are also clobbered or defined.
+    unsigned STPopped = STUses & (STDefs | STClobbers);
+    if (STPopped && !isMask_32(STPopped))
+      MI->emitError("implicitly popped regs must be last on the x87 stack");
+    unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+    // Scan the instruction for FP uses corresponding to "f" constraints.
+    // Collect FP registers to kill afer the instruction.
+    // Always kill all the scratch regs.
+    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+    unsigned FPUsed = 0;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &Op = MI->getOperand(i);
       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
         continue;
-      assert(Op.isUse() && "Only handle inline asm uses right now");
-      
+      if (!Op.isUse())
+        MI->emitError("illegal \"f\" output constraint");
       unsigned FPReg = getFPReg(Op);
-      Op.setReg(getSTReg(FPReg));
-      
+      FPUsed |= 1U << FPReg;
+
       // If we kill this operand, make sure to pop it from the stack after the
       // asm.  We just remember it for now, and pop them all off at the end in
       // a batch.
       if (Op.isKill())
-        Kills |= 1U << FPReg;
+        FPKills |= 1U << FPReg;
+    }
+
+    // The popped inputs will be killed by the instruction, so duplicate them
+    // if the FP register needs to be live after the instruction, or if it is
+    // used in the instruction itself. We effectively treat the popped inputs
+    // as early clobbers.
+    for (unsigned i = 0; i < NumSTPopped; ++i) {
+      if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+        continue;
+      unsigned SR = getScratchReg();
+      duplicateToTop(PendingST[i], SR, I);
+      DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                   << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+      PendingST[i] = SR;
+    }
+
+    // Make sure we have a unique live register for every fixed use. Some of
+    // them could be undef uses, and we need to emit LD_F0 instructions.
+    for (unsigned i = 0; i < NumSTUses; ++i) {
+      if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+        // Check for shared assignments.
+        for (unsigned j = 0; j < i; ++j) {
+          if (PendingST[j] != PendingST[i])
+            continue;
+          // STi and STj are inn the same register, create a copy.
+          unsigned SR = getScratchReg();
+          duplicateToTop(PendingST[i], SR, I);
+          DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                       << unsigned(PendingST[i])
+                       << " to avoid collision with ST" << j << '\n');
+          PendingST[i] = SR;
+        }
+        continue;
+      }
+      unsigned SR = getScratchReg();
+      DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+      BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+      pushReg(SR);
+      PendingST[i] = SR;
+      if (NumPendingSTs == i)
+        ++NumPendingSTs;
+    }
+    assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+    // Now we can rearrange the live registers to match what was requested.
+    shuffleStackTop(PendingST, NumPendingSTs, I);
+    DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+    // With the stack layout fixed, rewrite the FP registers.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      unsigned FPReg = getFPReg(Op);
+      Op.setReg(getSTReg(FPReg));
+    }
+
+    // Simulate the inline asm popping its inputs and pushing its outputs.
+    StackTop -= NumSTPopped;
+
+    // Hold the fixed output registers in scratch FP registers. They will be
+    // transferred to real FP registers by copies.
+    NumPendingSTs = 0;
+    for (unsigned i = 0; i < NumSTDefs; ++i) {
+      unsigned SR = getScratchReg();
+      pushReg(SR);
+      FPKills &= ~(1u << SR);
+    }
+    for (unsigned i = 0; i < NumSTDefs; ++i)
+      PendingST[NumPendingSTs++] = getStackEntry(i);
+    DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+    // If any of the ST defs were dead, pop them immediately. Our caller only
+    // handles dead FP defs.
+    MachineBasicBlock::iterator InsertPt = MI;
+    for (unsigned i = 0; STDefs & (1u << i); ++i) {
+      if (!(STDeadDefs & (1u << i)))
+        continue;
+      freeStackSlotAfter(InsertPt, PendingST[i]);
+      PendingST[i] = NumFPRegs;
     }
+    while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+      --NumPendingSTs;
 
     // If this asm kills any FP registers (is the last use of them) we must
     // explicitly emit pop instructions for them.  Do this now after the asm has
@@ -1382,16 +1600,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     //
     // Note: this might be a non-optimal pop sequence.  We might be able to do
     // better by trying to pop in stack order or something.
-    MachineBasicBlock::iterator InsertPt = MI;
-    while (Kills) {
-      unsigned FPReg = CountTrailingZeros_32(Kills);
-      freeStackSlotAfter(InsertPt, FPReg);
-      Kills &= ~(1U << FPReg);
+    while (FPKills) {
+      unsigned FPReg = CountTrailingZeros_32(FPKills);
+      if (isLive(FPReg))
+        freeStackSlotAfter(InsertPt, FPReg);
+      FPKills &= ~(1U << FPReg);
     }
     // Don't delete the inline asm!
     return;
   }
-      
+
   case X86::RET:
   case X86::RETI:
     // If RET has an FP register use operand, pass the first one in ST(0) and
@@ -1489,33 +1707,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   } else
     --I;
 }
-
-// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
-bool FPS::translateCopy(MachineInstr *MI) {
-  unsigned DstReg = MI->getOperand(0).getReg();
-  unsigned SrcReg = MI->getOperand(1).getReg();
-
-  if (DstReg == X86::ST0) {
-    MI->setDesc(TII->get(X86::FpSET_ST0_80));
-    MI->RemoveOperand(0);
-    return true;
-  }
-  if (DstReg == X86::ST1) {
-    MI->setDesc(TII->get(X86::FpSET_ST1_80));
-    MI->RemoveOperand(0);
-    return true;
-  }
-  if (SrcReg == X86::ST0) {
-    MI->setDesc(TII->get(X86::FpGET_ST0_80));
-    return true;
-  }
-  if (SrcReg == X86::ST1) {
-    MI->setDesc(TII->get(X86::FpGET_ST1_80));
-    return true;
-  }
-  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
-    MI->setDesc(TII->get(X86::MOV_Fp8080));
-    return true;
-  }
-  return false;
-}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 2e95300160d8..ed45a9a4c1c0 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -160,8 +161,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
         Opc = isSub
           ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
           : (Is64Bit ? X86::POP64r  : X86::POP32r);
-        BuildMI(MBB, MBBI, DL, TII.get(Opc))
+        MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+        if (isSub)
+          MI->setFlag(MachineInstr::FrameSetup);
         Offset -= ThisVal;
         continue;
       }
@@ -171,6 +174,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
       .addReg(StackPtr)
       .addImm(ThisVal);
+    if (isSub)
+      MI->setFlag(MachineInstr::FrameSetup);
     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
     Offset -= ThisVal;
   }
@@ -409,7 +414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
               TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
               StackPtr)
         .addReg(StackPtr)
-        .addImm(-TailCallReturnAddrDelta);
+        .addImm(-TailCallReturnAddrDelta)
+        .setMIFlag(MachineInstr::FrameSetup);
     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
   }
 
@@ -447,7 +453,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
 
     // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
-      .addReg(FramePtr, RegState::Kill);
+      .addReg(FramePtr, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
 
     if (needsFrameMoves) {
       // Mark the place where EBP/RBP was saved.
@@ -474,7 +481,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     // Update EBP with the new base value...
     BuildMI(MBB, MBBI, DL,
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr);
+        .addReg(StackPtr)
+        .setMIFlag(MachineInstr::FrameSetup);
 
     if (needsFrameMoves) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -642,7 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
 }
 
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
-                                MachineBasicBlock &MBB) const {
+                                    MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
@@ -919,7 +927,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       // X86RegisterInfo::emitPrologue will handle spilling of frame register.
       continue;
     CalleeFrameSize += SlotSize;
-    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
   }
 
   X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
@@ -1021,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     FrameIdx = 0;
   }
 }
+
+/// permuteEncode - Create the permutation encoding used with frameless
+/// stacks. It is passed the number of registers to be saved and an array of the
+/// registers saved.
+static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
+  // The saved registers are numbered from 1 to 6. In order to encode the order
+  // in which they were saved, we re-number them according to their place in the
+  // register order. The re-numbering is relative to the last re-numbered
+  // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+  //
+  //    Orig  Re-Num
+  //    ----  ------
+  //     6       6
+  //     2       2
+  //     4       3
+  //     5       3
+  //
+  bool Used[7] = { false, false, false, false, false, false, false };
+  uint32_t RenumRegs[6];
+  for (unsigned I = 0; I < SavedCount; ++I) {
+    uint32_t Renum = 0;
+    for (unsigned U = 1; U < 7; ++U) {
+      if (U == Registers[I])
+        break;
+      if (!Used[U])
+        ++Renum;
+    }
+
+    Used[Registers[I]] = true;
+    RenumRegs[I] = Renum;
+  }
+
+  // Take the renumbered values and encode them into a 10-bit number.
+  uint32_t permutationEncoding = 0;
+  switch (SavedCount) {
+  case 6:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 5:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 4:
+    permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
+                          + 3 * RenumRegs[2] +      RenumRegs[3];
+    break;
+  case 3:
+    permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
+                              + RenumRegs[2];
+    break;
+  case 2:
+    permutationEncoding |=  5 * RenumRegs[0] +     RenumRegs[1];
+    break;
+  case 1:
+    permutationEncoding |=      RenumRegs[0];
+    break;
+  }
+
+  return permutationEncoding;
+}
+
+uint32_t X86FrameLowering::
+getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                         int DataAlignmentFactor, bool IsEH) const {
+  uint32_t Encoding = 0;
+  int CFAOffset = 0;
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+  unsigned SavedRegIdx = 0;
+  int FramePointerReg = -1;
+
+  for (ArrayRef<MCCFIInstruction>::const_iterator
+         I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+    const MCCFIInstruction &Inst = *I;
+    MCSymbol *Label = Inst.getLabel();
+
+    // Ignore invalid labels.
+    if (Label && !Label->isDefined()) continue;
+
+    unsigned Operation = Inst.getOperation();
+    if (Operation != MCCFIInstruction::Move &&
+        Operation != MCCFIInstruction::RelMove)
+      // FIXME: We can't handle this frame just yet.
+      return 0;
+
+    const MachineLocation &Dst = Inst.getDestination();
+    const MachineLocation &Src = Inst.getSource();
+    const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
+
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (Src.getReg() != MachineLocation::VirtualFP) {
+        // DW_CFA_def_cfa
+        assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
+        if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
+            TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
+          // The frame pointer isn't EBP/RBP. Cannot make unwind information
+          // compact.
+          return 0;
+        FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
+      } // else DW_CFA_def_cfa_offset
+
+      if (IsRelative)
+        CFAOffset += Src.getOffset();
+      else
+        CFAOffset -= Src.getOffset();
+
+      continue;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      // DW_CFA_def_cfa_register
+      assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+
+      if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
+          TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
+        // The frame pointer isn't EBP/RBP. Cannot make unwind information
+        // compact.
+        return 0;
+
+      FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
+      if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
+        return 0;
+
+      SavedRegs[0] = 0;
+      SavedRegIdx = 0;
+      continue;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset();
+    if (IsRelative)
+      Offset -= CFAOffset;
+    Offset /= DataAlignmentFactor;
+
+    if (Offset < 0) {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended_sf
+      return 0;
+    } else if (Reg < 64) {
+      // DW_CFA_offset + Reg
+      if (SavedRegIdx >= 6) return 0;
+      int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
+      if (CURegNum == -1) return 0;
+      SavedRegs[SavedRegIdx++] = CURegNum;
+    } else {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended
+      return 0;
+    }
+  }
+
+  // Bail if there are too many registers to encode.
+  if (SavedRegIdx > 6) return 0;
+
+  // Check if the offset is too big.
+  CFAOffset /= 4;
+  if ((CFAOffset & 0xFF) != CFAOffset)
+    return 0;
+  Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
+
+  if (FramePointerReg != -1) {
+    Encoding |= 0x01000000;     // EBP/RBP Unwind Frame
+    for (unsigned I = 0; I != SavedRegIdx; ++I) {
+      unsigned Reg = SavedRegs[I];
+      if (Reg == unsigned(FramePointerReg)) continue;
+      Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
+    }
+  } else {
+    Encoding |= 0x02000000;     // Frameless unwind with small stack
+    Encoding |= (SavedRegIdx & 0x7) << 10;
+    Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+  }
+
+  return Encoding;
+}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index d71108cd0586..14c31ed47cf1 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -15,6 +15,7 @@
 #define X86_FRAMELOWERING_H
 
 #include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -58,6 +59,9 @@ public:
 
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                                    int DataAlignmentFactor, bool IsEH) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1fcc274e0f85..2b0f283bec75 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Type.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -191,6 +192,7 @@ namespace {
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
     SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
 
+    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
     bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
@@ -546,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
       EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
 }
 
+static bool isDispSafeForFrameIndex(int64_t Val) {
+  // On 64-bit platforms, we can run into an issue where a frame index
+  // includes a displacement that, when added to the explicit displacement,
+  // will overflow the displacement field. Assuming that the frame index
+  // displacement fits into a 31-bit integer  (which is only slightly more
+  // aggressive than the current fundamental assumption that it fits into
+  // a 32-bit integer), a 31-bit disp should always be safe.
+  return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+                                            X86ISelAddressMode &AM) {
+  int64_t Val = AM.Disp + Offset;
+  CodeModel::Model M = TM.getCodeModel();
+  if (Subtarget->is64Bit()) {
+    if (!X86::isOffsetSuitableForCodeModel(Val, M,
+                                           AM.hasSymbolicDisplacement()))
+      return true;
+    // In addition to the checks required for a register base, check that
+    // we do not try to use an unsafe Disp with a frame index.
+    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+        !isDispSafeForFrameIndex(Val))
+      return true;
+  }
+  AM.Disp = Val;
+  return false;
+
+}
 
 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
@@ -595,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       // must allow RIP.
       !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
-      int64_t Offset = AM.Disp + G->getOffset();
-      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      X86ISelAddressMode Backup = AM;
       AM.GV = G->getGlobal();
-      AM.Disp = Offset;
       AM.SymbolFlags = G->getTargetFlags();
+      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
-      int64_t Offset = AM.Disp + CP->getOffset();
-      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      X86ISelAddressMode Backup = AM;
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
-      AM.Disp = Offset;
       AM.SymbolFlags = CP->getTargetFlags();
+      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
@@ -688,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
 
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
-  bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
   DEBUG({
       dbgs() << "MatchAddress: ";
@@ -698,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   if (Depth > 5)
     return MatchAddressBase(N, AM);
 
-  CodeModel::Model M = TM.getCodeModel();
-
   // If this is already a %rip relative address, we can only merge immediates
   // into it.  Instead of handling this in every case, we handle it here.
   // RIP relative addressing: %rip + 32-bit displacement!
@@ -709,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // consistency.
     if (!AM.ES && AM.JT != -1) return true;
 
-    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
-      int64_t Val = AM.Disp + Cst->getSExtValue();
-      if (X86::isOffsetSuitableForCodeModel(Val, M,
-                                            AM.hasSymbolicDisplacement())) {
-        AM.Disp = Val;
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
         return false;
-      }
-    }
     return true;
   }
 
@@ -724,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   default: break;
   case ISD::Constant: {
     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    if (!is64Bit ||
-        X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
-                                          AM.hasSymbolicDisplacement())) {
-      AM.Disp += Val;
+    if (!FoldOffsetIntoAddress(Val, AM))
       return false;
-    }
     break;
   }
 
@@ -745,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case ISD::FrameIndex:
-    if (AM.BaseType == X86ISelAddressMode::RegBase
-        && AM.Base_Reg.getNode() == 0) {
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        AM.Base_Reg.getNode() == 0 &&
+        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
       AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
       return false;
@@ -775,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
-          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
-          if (!is64Bit ||
-              X86::isOffsetSuitableForCodeModel(Disp, M,
-                                                AM.hasSymbolicDisplacement()))
-            AM.Disp = Disp;
-          else
-            AM.IndexReg = ShVal;
-        } else {
-          AM.IndexReg = ShVal;
+          uint64_t Disp = AddVal->getSExtValue() << Val;
+          if (!FoldOffsetIntoAddress(Disp, AM))
+            return false;
         }
+
+        AM.IndexReg = ShVal;
         return false;
       }
     break;
@@ -818,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
             Reg = MulVal.getNode()->getOperand(0);
             ConstantSDNode *AddVal =
               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
-            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
-                                      CN->getZExtValue();
-            if (!is64Bit ||
-                X86::isOffsetSuitableForCodeModel(Disp, M,
-                                                  AM.hasSymbolicDisplacement()))
-              AM.Disp = Disp;
-            else
+            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+            if (FoldOffsetIntoAddress(Disp, AM))
               Reg = N.getNode()->getOperand(0);
           } else {
             Reg = N.getNode()->getOperand(0);
@@ -949,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     if (CurDAG->isBaseWithConstantOffset(N)) {
       X86ISelAddressMode Backup = AM;
       ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-      uint64_t Offset = CN->getSExtValue();
 
       // Start with the LHS as an addr mode.
       if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
-          // Address could not have picked a GV address for the displacement.
-          AM.GV == NULL &&
-          // On x86-64, the resultant disp must fit in 32-bits.
-          (!is64Bit ||
-           X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
-                                             AM.hasSymbolicDisplacement()))) {
-        AM.Disp += Offset;
+          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
         return false;
-      }
       AM = Backup;
     }
     break;
@@ -1351,7 +1357,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
 
   bool isInc = false, isDec = false, isSub = false, isCN = false;
   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN) {
+  if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
     isCN = true;
     int64_t CNVal = CN->getSExtValue();
     if (CNVal == 1)
@@ -1371,6 +1377,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     Val = Val.getOperand(1);
   }
 
+  DebugLoc dl = Node->getDebugLoc();
   unsigned Opc = 0;
   switch (NVT.getSimpleVT().SimpleTy) {
   default: return 0;
@@ -1462,7 +1469,6 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     break;
   }
 
-  DebugLoc dl = Node->getDebugLoc();
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -1579,7 +1585,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   
   bool isCN = false;
   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN) {
+  if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
     isCN = true;
     Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
   }
@@ -1612,16 +1618,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
         Opc = AtomicOpcTbl[Op][I32];
       break;
     case MVT::i64:
+      Opc = AtomicOpcTbl[Op][I64];
       if (isCN) {
         if (immSext8(Val.getNode()))
           Opc = AtomicOpcTbl[Op][SextConstantI64];
         else if (i64immSExt32(Val.getNode()))
           Opc = AtomicOpcTbl[Op][ConstantI64];
-      } else
-        Opc = AtomicOpcTbl[Op][I64];
+      }
       break;
   }
   
+  assert(Opc != 0 && "Invalid arith lock transform!");
+
   DebugLoc dl = Node->getDebugLoc();
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 294a6a74cc77..5096d9ae2edf 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Setup Windows compiler runtime calls.
     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::SREM_I64, "_allrem");
+    setLibcallName(RTLIB::UREM_I64, "_aullrem");
+    setLibcallName(RTLIB::MUL_I64, "_allmul");
     setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
     setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
   }
@@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
   }
 
+  // We don't support FMA.
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   // Long double always uses X87.
   if (!UseSoftFloat) {
     addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
@@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
     }
+
+    setOperationAction(ISD::FMA, MVT::f80, Expand);
   }
 
   // Always use a library call for pow.
@@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
 
     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
-    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
 
@@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
 
-    // Custom lower build_vector, vector_shuffle, scalar_to_vector,
-    // insert_vector_elt extract_subvector and extract_vector_elt for
-    // 256-bit types.
-    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         ++i) {
-      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-      // Do not attempt to custom lower non-256-bit vectors
-      if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
-          || (MVT(VT).getSizeInBits() < 256))
-        continue;
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
-    }
-    // Custom-lower insert_subvector and extract_subvector based on
-    // the result type.
+    // Custom lower several nodes for 256-bit types.
     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         ++i) {
-      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-      // Do not attempt to custom lower non-256-bit vectors
-      if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+                  i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Extract subvector is special because the value type
+      // (result) is 128-bit but the source is 256-bit wide.
+      if (VT.is128BitVector())
+        setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+      // Do not attempt to custom lower other non-256-bit vectors
+      if (!VT.is256BitVector())
         continue;
 
-      if (MVT(VT).getSizeInBits() == 128) {
-        setOperationAction(ISD::EXTRACT_SUBVECTOR,  VT, Custom);
-      }
-      else if (MVT(VT).getSizeInBits() == 256) {
-        setOperationAction(ISD::INSERT_SUBVECTOR,  VT, Custom);
-      }
+      setOperationAction(ISD::BUILD_VECTOR,       SVT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     SVT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  SVT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   SVT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR,   SVT, Custom);
     }
 
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
-    // Don't promote loads because we need them for VPERM vector index versions.
+    for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
 
-    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         VT++) {
-      if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
-          || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
+      // Do not attempt to promote non-256-bit vectors
+      if (!VT.is256BitVector())
         continue;
-      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v4i64);
-      //setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
-      //AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
+
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v4i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
     }
   }
 
+  // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+  // of this type with custom code.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -1511,20 +1517,15 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     // If this is a call to a function that returns an fp value on the floating
     // point stack, we must guarantee the the value is popped from the stack, so
     // a CopyFromReg is not good enough - the copy instruction may be eliminated
-    // if the return value is not used. We use the FpGET_ST0 instructions
+    // if the return value is not used. We use the FpPOP_RETVAL instruction
     // instead.
     if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
       // If we prefer to use the value in xmm registers, copy it out as f80 and
       // use a truncate to move it from fp stack reg to xmm reg.
       if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
-      bool isST0 = VA.getLocReg() == X86::ST0;
-      unsigned Opc = 0;
-      if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
-      if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
-      if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
       SDValue Ops[] = { Chain, InFlag };
-      Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
-                                         Ops, 2), 1);
+      Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+                                         MVT::Other, MVT::Glue, Ops, 2), 1);
       Val = Chain.getValue(0);
 
       // Round the f80 to the right size, which also moves it to the appropriate
@@ -1898,7 +1899,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   // Some CCs need callee pop.
-  if (Subtarget->IsCalleePop(isVarArg, CallConv)) {
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -2271,6 +2272,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     const GlobalValue *GV = G->getGlobal();
     if (!GV->hasDLLImportLinkage()) {
       unsigned char OpFlags = 0;
+      bool ExtraLoad = false;
+      unsigned WrapperKind = ISD::DELETED_NODE;
 
       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
       // external symbols most go through the PLT in PIC mode.  If the symbol
@@ -2288,10 +2291,28 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
         OpFlags = X86II::MO_DARWIN_STUB;
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 isa<Function>(GV) &&
+                 cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+        // If the function is marked as non-lazy, generate an indirect call
+        // which loads from the GOT directly. This avoids runtime overhead
+        // at the cost of eager binding (and one extra byte of encoding).
+        OpFlags = X86II::MO_GOTPCREL;
+        WrapperKind = X86ISD::WrapperRIP;
+        ExtraLoad = true;
       }
 
       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
                                           G->getOffset(), OpFlags);
+
+      // Add a wrapper if needed.
+      if (WrapperKind != ISD::DELETED_NODE)
+        Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+      // Add extra indirection if needed.
+      if (ExtraLoad)
+        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+                             MachinePointerInfo::getGOT(),
+                             false, false, 0);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
@@ -2363,7 +2384,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPush;
-  if (Subtarget->IsCalleePop(isVarArg, CallConv))
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
   else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
     // If this is a call to a struct-return function, the callee
@@ -2485,6 +2506,10 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
     if (!FINode)
       return false;
     FI = FINode->getIndex();
+  } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+    FI = FINode->getIndex();
+    Bytes = Flags.getByValSize();
   } else
     return false;
 
@@ -2536,6 +2561,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
   // Do not sibcall optimize vararg calls unless all arguments are passed via
   // registers.
   if (isVarArg && !Outs.empty()) {
@@ -2672,11 +2702,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
-  // An stdcall caller is expected to clean up its arguments; the callee
-  // isn't going to do that.
-  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
-    return false;
-
   return true;
 }
 
@@ -2856,6 +2881,29 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
   return false;
 }
 
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+                      bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+  if (IsVarArg)
+    return false;
+
+  switch (CallingConv) {
+  default:
+    return false;
+  case CallingConv::X86_StdCall:
+    return !is64Bit;
+  case CallingConv::X86_FastCall:
+    return !is64Bit;
+  case CallingConv::X86_ThisCall:
+    return !is64Bit;
+  case CallingConv::Fast:
+    return TailCallOpt;
+  case CallingConv::GHC:
+    return TailCallOpt;
+  }
+}
+
 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
 /// specific condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
@@ -3790,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
 }
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
-///
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
+  assert((VT.is128BitVector() || VT.is256BitVector())
+         && "Expected a 128-bit or 256-bit vector type");
 
-  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
-  // type.  This ensures they get CSE'd.
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
   SDValue Vec;
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  if (VT.is256BitVector()) {
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+  } else
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
-
 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
 /// that point to V2 points to its first element.
 static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
@@ -4417,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return ConcatVectors(Lower, Upper, DAG);
   }
 
-  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
-  // All one's are handled with pcmpeqd. In AVX, zero's are handled with
-  // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
-  // is present, so AllOnes is ignored.
+  // All zero's:
+  //  - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+  // All one's:
+  //  - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
   if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
-      (Op.getValueType().getSizeInBits() != 256 &&
-       ISD::isBuildVectorAllOnes(Op.getNode()))) {
-    // Canonicalize this to <4 x i32> (SSE) to
+      ISD::isBuildVectorAllOnes(Op.getNode())) {
+    // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
     // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
     // eliminated on x86-32 hosts.
-    if (Op.getValueType() == MVT::v4i32)
+    if (Op.getValueType() == MVT::v4i32 ||
+        Op.getValueType() == MVT::v8i32)
       return Op;
 
     if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -8874,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Lower SHL with variable shift amount.
-  // Cannot lower SHL without SSE4.1 or later.
-  if (!Subtarget->hasSSE41()) return SDValue();
+  // Cannot lower SHL without SSE2 or later.
+  if (!Subtarget->hasSSE2()) return SDValue();
 
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -9022,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   return Sum;
 }
 
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+  DebugLoc dl = Op.getDebugLoc();
+  SDNode* Node = Op.getNode();
+  EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+  EVT VT = Node->getValueType(0);
+
+  if (Subtarget->hasSSE2() && VT.isVector()) {
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+    unsigned SHLIntrinsicsID = 0;
+    unsigned SRAIntrinsicsID = 0;
+    switch (VT.getSimpleVT().SimpleTy) {
+      default:
+        return SDValue();
+      case MVT::v2i64: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+        SRAIntrinsicsID = 0;
+        break;
+      }
+      case MVT::v4i32: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+        break;
+      }
+      case MVT::v8i16: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+        break;
+      }
+    }
+
+    SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+                         Node->getOperand(0), ShAmt);
+
+    // In case of 1 bit sext, no need to shr
+    if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+    if (SRAIntrinsicsID) {
+      Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+                         Tmp1, ShAmt);
+    }
+    return Tmp1;
+  }
+
+  return SDValue();
+}
+
+
 SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
   DebugLoc dl = Op.getDebugLoc();
 
-  if (!Subtarget->hasSSE2()) {
+  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+  // There isn't any reason to disable it if the target processor supports it.
+  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0,
-                                   Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    SDValue Zero = DAG.getConstant(0, MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::ESP, MVT::i32), // Base
       DAG.getTargetConstant(1, MVT::i8),   // Scale
@@ -9183,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
+  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
   case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op,DAG);
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
@@ -9281,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   default:
     assert(false && "Do not know how to custom type legalize this operation!");
     return;
+  case ISD::SIGN_EXTEND_INREG:
   case ISD::ADDC:
   case ISD::ADDE:
   case ISD::SUBC:
@@ -9415,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
-  case X86ISD::PANDN:              return "X86ISD::PANDN";
+  case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
   case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
   case X86ISD::PSIGND:             return "X86ISD::PSIGND";
@@ -11766,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   if (R.getNode())
     return R;
 
-  // Want to form PANDN nodes, in the hopes of then easily combining them with
-  // OR and AND nodes to form PBLEND/PSIGN.
+  // Want to form ANDNP nodes:
+  // 1) In the hopes of then easily combining them with OR and AND nodes
+  //    to form PBLEND/PSIGN.
+  // 2) To match ANDN packed intrinsics
   EVT VT = N->getValueType(0);
-  if (VT != MVT::v2i64)
+  if (VT != MVT::v2i64 && VT != MVT::v4i64)
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
@@ -11779,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   // Check LHS for vnot
   if (N0.getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
-    return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
 
   // Check RHS for vnot
   if (N1.getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
-    return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
 
   return SDValue();
 }
@@ -11810,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   if (Subtarget->hasSSSE3()) {
     if (VT == MVT::v2i64) {
       // Canonicalize pandn to RHS
-      if (N0.getOpcode() == X86ISD::PANDN)
+      if (N0.getOpcode() == X86ISD::ANDNP)
         std::swap(N0, N1);
       // or (and (m, x), (pandn m, y))
-      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
         SDValue Mask = N1.getOperand(0);
         SDValue X    = N1.getOperand(1);
         SDValue Y;
@@ -11822,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
         if (N0.getOperand(1) == Mask)
           Y = N0.getOperand(0);
 
-        // Check to see if the mask appeared in both the AND and PANDN and
+        // Check to see if the mask appeared in both the AND and ANDNP and
         if (!Y.getNode())
           return SDValue();
 
@@ -12166,8 +12276,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
-  DebugLoc dl = N->getDebugLoc();
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        const X86TargetLowering *XTLI) {
   SDValue Op0 = N->getOperand(0);
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
@@ -12178,7 +12288,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
         !XTLI->getSubtarget()->is64Bit() &&
         !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
-      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+                                          Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
     }
@@ -12549,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'y':
     case 'x':
     case 'Y':
+    case 'l':
       return C_RegisterClass;
     case 'a':
     case 'b':
@@ -12832,60 +12944,6 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-std::vector<unsigned> X86TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() == 1) {
-    // FIXME: not handling fp-stack yet!
-    switch (Constraint[0]) {      // GCC X86 Constraint Letters
-    default: break;  // Unknown constraint letter
-    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
-      if (Subtarget->is64Bit()) {
-        if (VT == MVT::i32)
-          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
-                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
-                                       X86::R10D,X86::R11D,X86::R12D,
-                                       X86::R13D,X86::R14D,X86::R15D,
-                                       X86::EBP, X86::ESP, 0);
-        else if (VT == MVT::i16)
-          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
-                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
-                                       X86::R10W,X86::R11W,X86::R12W,
-                                       X86::R13W,X86::R14W,X86::R15W,
-                                       X86::BP,  X86::SP, 0);
-        else if (VT == MVT::i8)
-          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
-                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
-                                       X86::R10B,X86::R11B,X86::R12B,
-                                       X86::R13B,X86::R14B,X86::R15B,
-                                       X86::BPL, X86::SPL, 0);
-
-        else if (VT == MVT::i64)
-          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
-                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
-                                       X86::R10, X86::R11, X86::R12,
-                                       X86::R13, X86::R14, X86::R15,
-                                       X86::RBP, X86::RSP, 0);
-
-        break;
-      }
-      // 32-bit fallthrough
-    case 'Q':   // Q_REGS
-      if (VT == MVT::i32)
-        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
-      else if (VT == MVT::i16)
-        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
-      else if (VT == MVT::i8)
-        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
-      else if (VT == MVT::i64)
-        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
-      break;
-    }
-  }
-
-  return std::vector<unsigned>();
-}
-
 std::pair<unsigned, const TargetRegisterClass*>
 X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
@@ -12895,9 +12953,35 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     // GCC Constraint Letters
     switch (Constraint[0]) {
     default: break;
+      // TODO: Slight differences here in allocation order and leaving
+      // RIP in the class. Do they matter any more here than they do
+      // in the normal allocation?
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+	if (VT == MVT::i32 || VT == MVT::f32)
+	  return std::make_pair(0U, X86::GR32RegisterClass);
+	else if (VT == MVT::i16)
+	  return std::make_pair(0U, X86::GR16RegisterClass);
+	else if (VT == MVT::i8 || VT == MVT::i1)
+	  return std::make_pair(0U, X86::GR8RegisterClass);
+	else if (VT == MVT::i64 || VT == MVT::f64)
+	  return std::make_pair(0U, X86::GR64RegisterClass);
+	break;
+      }
+      // 32-bit fallthrough
+    case 'Q':   // Q_REGS
+      if (VT == MVT::i32 || VT == MVT::f32)
+	return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
+      else if (VT == MVT::i16)
+	return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
+      else if (VT == MVT::i8 || VT == MVT::i1)
+	return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
+      else if (VT == MVT::i64)
+	return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+      break;
     case 'r':   // GENERAL_REGS
     case 'l':   // INDEX_REGS
-      if (VT == MVT::i8)
+      if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, X86::GR8RegisterClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, X86::GR16RegisterClass);
@@ -12905,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return std::make_pair(0U, X86::GR32RegisterClass);
       return std::make_pair(0U, X86::GR64RegisterClass);
     case 'R':   // LEGACY_REGS
-      if (VT == MVT::i8)
+      if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d61a1252304a..b6036782b865 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -169,8 +169,8 @@ namespace llvm {
       /// PSHUFB - Shuffle 16 8-bit values within a vector.
       PSHUFB,
 
-      /// PANDN - and with not'd value.
-      PANDN,
+      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+      ANDNP,
 
       /// PSIGNB/W/D - Copy integer sign.
       PSIGNB, PSIGNW, PSIGND,
@@ -466,6 +466,12 @@ namespace llvm {
     /// fit into displacement field of the instruction.
     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
                                       bool hasSymbolicDisplacement = true);
+
+
+    /// isCalleePop - Determines whether the callee is required to pop its
+    /// own arguments. Callee pop is necessary to support tail calls.
+    bool isCalleePop(CallingConv::ID CallingConv,
+                     bool is64Bit, bool IsVarArg, bool TailCallOpt);
   }
 
   //===--------------------------------------------------------------------===//
@@ -590,10 +596,6 @@ namespace llvm {
     virtual ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::vector<unsigned>
-      getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        EVT VT) const;
-
     virtual const char *LowerXConstraint(EVT ConstraintVT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -823,6 +825,7 @@ namespace llvm {
     SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
 
     // Utility functions to help LowerVECTOR_SHUFFLE
     SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 1ea8071053e9..0245e5c09644 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -150,11 +150,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
   unsigned Flags = 0;
-  if (TID.mayLoad())
+  if (MCID.mayLoad())
     Flags |= MachineMemOperand::MOLoad;
-  if (TID.mayStore())
+  if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 33534cd82bdb..adcc747eb4b8 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1368,6 +1368,11 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
 
 
 // (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index b506f5e0b81a..7cb870fabd62 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -112,31 +112,8 @@ let usesCustomInserter = 1 in {  // Expanded after instruction selection.
 // a pattern) and the FPI instruction should have emission info (e.g. opcode
 // encoding and asm printing info).
 
-// Pseudo Instructions for FP stack return values.
-def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-
-// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when
-// there are two values live out on the stack from a call or inlineasm.  This
-// magic is handled by the stackifier.  It is not valid to emit FpGET_ST1* and
-// then FpGET_ST0*.  In addition, it is invalid for any FP-using operations to
-// occur between them.
-def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-
-let Defs = [ST0] in {
-def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
-def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
-def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
-}
-
-let Defs = [ST1] in {
-def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR
-def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
-def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
-}
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
 
 // FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
 // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
@@ -147,19 +124,6 @@ class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
 class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
   FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
 
-// Register copies.  Just copies, the shortening ones do not truncate.
-let neverHasSideEffects = 1 in {
-  def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; 
-  def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; 
-  def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp8080 : FpI_  <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; 
-}
-
 // Factoring for arithmetic.
 multiclass FPBinary_rr<SDNode OpNode> {
 // Register op register -> register
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 7daa26492274..6d89bcc29e7b 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
 class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
                list<dag>pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+                  list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
         OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
 
 // FMA3 Instruction Templates
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7c9a9f7e8c50..b00109c9fa4d 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -46,8 +46,8 @@ def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
-def X86pandn   : SDNode<"X86ISD::PANDN", 
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+def X86andnp   : SDNode<"X86ISD::ANDNP",
+                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86psignb  : SDNode<"X86ISD::PSIGNB", 
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
@@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";
   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 def sdmem : Operand<v2f64> {
   let PrintMethod = "printf64mem";
   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 //===----------------------------------------------------------------------===//
@@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
 
 // 256-bit bitconvert pattern fragments
 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
 
 def vzmovl_v2i64 : PatFrag<(ops node:$src),
                            (bitconvert (v2i64 (X86vzmovl
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e2016eb2d6fb..55b5835f52a7 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "X86InstrInfo.h"
 #include "X86.h"
-#include "X86GenInstrInfo.inc"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
@@ -36,6 +35,9 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include <limits>
 
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -52,7 +54,12 @@ ReMatPICStubLoad("remat-pic-stub-load",
                  cl::init(false), cl::Hidden);
 
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
-  : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
+  : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKDOWN64
+                     : X86::ADJCALLSTACKDOWN32),
+                    (tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKUP64
+                     : X86::ADJCALLSTACKUP32)),
     TM(tm), RI(tm, *this) {
   enum {
     TB_NOT_REVERSABLE = 1U << 31,
@@ -293,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
     { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
     { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr, 0, 32 },
+    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr, 0, 32 },
+    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr, 0, 32 },
     { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
     { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
     { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
     { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
     { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
     { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr, 0, 0 },
+    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr, 0, 0 },
     { X86::MUL16r,      X86::MUL16m, 1, 0 },
     { X86::MUL32r,      X86::MUL32m, 1, 0 },
     { X86::MUL64r,      X86::MUL64m, 1, 0 },
@@ -403,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOV8rr,          X86::MOV8rm, 0 },
     { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
     { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm, 32 },
+    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm, 32 },
     { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
     { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
     { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
     { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm, 16 },
     { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
     { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
     { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
@@ -417,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
     { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
     { X86::MOVUPSrr,        X86::MOVUPSrm, 0 },
+    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm, 0 },
+    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm, 0 },
     { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
     { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
     { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
@@ -779,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) {
   case X86::MOVAPSrm:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
     return true;
@@ -800,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) {
   case X86::MOVAPSmr:
   case X86::MOVAPDmr:
   case X86::MOVDQAmr:
+  case X86::VMOVAPSYmr:
+  case X86::VMOVAPDYmr:
+  case X86::VMOVDQAYmr:
   case X86::MMX_MOVD64mr:
   case X86::MMX_MOVQ64mr:
   case X86::MMX_MOVNTQmr:
@@ -918,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
     case X86::MOVUPSrm:
     case X86::MOVAPDrm:
     case X86::MOVDQArm:
+    case X86::VMOVAPSYrm:
+    case X86::VMOVUPSYrm:
+    case X86::VMOVAPDYrm:
+    case X86::VMOVDQAYrm:
     case X86::MMX_MOVD64rm:
     case X86::MMX_MOVQ64rm:
     case X86::FsMOVAPSrm:
@@ -1689,13 +1716,13 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
 }
 
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -1789,7 +1816,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
           .addMBB(UnCondBrIter->getOperand(0).getMBB());
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
           .addMBB(TargetBB);
-        MBB.addSuccessor(TargetBB);
 
         OldInst->eraseFromParent();
         UnCondBrIter->eraseFromParent();
@@ -1968,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       Opc = X86::MOV8rr;
   } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
     Opc = X86::MOVAPSrr;
+  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+    Opc = X86::VMOVAPSYrr;
   else if (X86::VR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MMX_MOVQ64rr;
   else
@@ -2057,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
       return load ? X86::MOVAPSrm : X86::MOVAPSmr;
     else
       return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+  case 32:
+    assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+    // If stack is realigned we can use aligned stores.
+    if (isStackAligned)
+      return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+    else
+      return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
   }
 }
 
@@ -2083,7 +2118,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   const MachineFunction &MF = *MBB.getParent();
   assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
          "Stack slot too small for store");
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -2115,7 +2151,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -2224,7 +2261,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // FIXME: AsmPrinter doesn't know how to handle
   // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
@@ -2273,7 +2310,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
         return NULL;
       bool NarrowToMOV32rm = false;
       if (Size) {
-        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
         if (Size < RCSize) {
           // Check if it's safe to fold the load. If the size of the object is
           // narrower than the load width, then it's not.
@@ -2542,7 +2579,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   unsigned Opc = MI->getOpcode();
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
@@ -2588,9 +2625,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
     return false;
   UnfoldStore &= FoldedStore;
 
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetOperandInfo &TOI = TID.OpInfo[Index];
-  const TargetRegisterClass *RC = TOI.getRegClass(&RI);
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
   if (!MI->hasOneMemOperand() &&
       RC == &X86::VR128RegClass &&
       !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
@@ -2632,7 +2668,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
   }
 
   // Emit the data processing instruction.
-  MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
   MachineInstrBuilder MIB(DataMI);
 
   if (FoldedStore)
@@ -2685,7 +2721,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   // Emit the store instruction.
   if (UnfoldStore) {
-    const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
     std::pair<MachineInstr::mmo_iterator,
               MachineInstr::mmo_iterator> MMOs =
       MF.extractStoreMemRefs(MI->memoperands_begin(),
@@ -2710,9 +2746,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   unsigned Index = I->second.second & 0xf;
   bool FoldedLoad = I->second.second & (1 << 4);
   bool FoldedStore = I->second.second & (1 << 5);
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
-  unsigned NumDefs = TID.NumDefs;
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+  unsigned NumDefs = MCID.NumDefs;
   std::vector<SDValue> AddrOps;
   std::vector<SDValue> BeforeOps;
   std::vector<SDValue> AfterOps;
@@ -2756,13 +2792,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   // Emit the data processing instruction.
   std::vector<EVT> VTs;
   const TargetRegisterClass *DstRC = 0;
-  if (TID.getNumDefs() > 0) {
-    DstRC = TID.OpInfo[0].getRegClass(&RI);
+  if (MCID.getNumDefs() > 0) {
+    DstRC = getRegClass(MCID, 0, &RI);
     VTs.push_back(*DstRC->vt_begin());
   }
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+    if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
       VTs.push_back(VT);
   }
   if (Load)
@@ -2845,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
     break;
   }
   switch (Opc2) {
@@ -2867,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
     break;
   }
 
@@ -3045,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = {
   { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
   { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
   { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
+  // AVX 256-bit support
+  { X86::VMOVAPSYmr,   X86::VMOVAPDYmr,   X86::VMOVDQAYmr  },
+  { X86::VMOVAPSYrm,   X86::VMOVAPDYrm,   X86::VMOVDQAYrm  },
+  { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
+  { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
+  { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d8950230d83d..5f2eba34ac45 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -19,6 +19,9 @@
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
 namespace llvm {
   class X86RegisterInfo;
   class X86TargetMachine;
@@ -611,7 +614,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
     isLeaMem(MI, Op);
 }
 
-class X86InstrInfo : public TargetInstrInfoImpl {
+class X86InstrInfo : public X86GenInstrInfo {
   X86TargetMachine &TM;
   const X86RegisterInfo RI;
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8cab80824688..7eb07b0a97bd 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+let OperandType = "OPERAND_MEMORY" in {
 def opaque32mem : X86MemOperand<"printopaquemem">;
 def opaque48mem : X86MemOperand<"printopaquemem">;
 def opaque80mem : X86MemOperand<"printopaquemem">;
@@ -267,6 +268,7 @@ def f64mem  : X86MemOperand<"printf64mem">;
 def f80mem  : X86MemOperand<"printf80mem">;
 def f128mem : X86MemOperand<"printf128mem">;
 def f256mem : X86MemOperand<"printf256mem">;
+}
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
 // plain GR64, so that it doesn't potentially require a REX prefix.
@@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> {
   let PrintMethod = "printi8mem";
   let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // GPRs available for tailcall.
@@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> {
   let PrintMethod = "printi32mem";
   let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // Special i64mem for addresses of load folding tail calls. These are not
@@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> {
   let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
                        ptr_rc_tailcall, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
-let ParserMatchClass = X86AbsMemAsmOperand,
+let OperandType = "OPERAND_PCREL",
+    ParserMatchClass = X86AbsMemAsmOperand,
     PrintMethod = "print_pcrel_imm" in {
 def i32imm_pcrel : Operand<i32>;
 def i16imm_pcrel : Operand<i16>;
@@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>;
 
 def SSECC : Operand<i8> {
   let PrintMethod = "printSSECC";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 class ImmSExtAsmOperandClass : AsmOperandClass {
@@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
 // 16-bits but only 8 bits are significant.
 def i16i8imm  : Operand<i16> {
   let ParserMatchClass = ImmSExti16i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32> {
   let ParserMatchClass = ImmSExti32i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // 64-bits but only 32 bits are significant.
 def i64i32imm  : Operand<i64> {
   let ParserMatchClass = ImmSExti64i32AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // 64-bits but only 32 bits are significant, and those bits are treated as being
@@ -438,8 +448,10 @@ def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
 def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
 def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
-def In32BitMode  : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
-def In64BitMode  : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
+def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,
+                             AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
+                             AssemblerPredicate<"Mode64Bit">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
 }
 
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
                      "push{q}\t$imm", []>;
 def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
                       "push{q}\t$imm", []>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b64c03a9b597..fe11d776804c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -512,6 +512,26 @@ defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
                                   VEX_4V, VEX_W;
 
+let Predicates = [HasAVX] in {
+  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+  def : Pat<(f32 (sint_to_fp GR32:$src)),
+            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f32 (sint_to_fp GR64:$src)),
+            (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+  def : Pat<(f64 (sint_to_fp GR32:$src)),
+            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f64 (sint_to_fp GR64:$src)),
+            (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                       "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
 /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
 ///
 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
-                                 SDNode OpNode, int HasPat = 0,
-                                 list<list<dag>> Pattern = []> {
+                                   SDNode OpNode> {
   let Pattern = []<dag> in {
     defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
-         !if(HasPat, Pattern[0], // rr
-                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
-                                                      VR128:$src2)))]),
-         !if(HasPat, Pattern[2], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))]), 0>,
-                                               VEX_4V;
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>, VEX_4V;
 
     defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
          !strconcat(OpcodeStr, "pd"), f128mem,
-         !if(HasPat, Pattern[1], // rr
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (bc_v2i64 (v2f64
-                                               VR128:$src2))))]),
-         !if(HasPat, Pattern[3], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))]), 0>,
-                                                               OpSize, VEX_4V;
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>,
+                                                   OpSize, VEX_4V;
   }
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
-         !if(HasPat, Pattern[0], // rr
-                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
-                                                      VR128:$src2)))]),
-         !if(HasPat, Pattern[2], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))])>, TB;
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB;
 
     defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
          !strconcat(OpcodeStr, "pd"), f128mem,
-         !if(HasPat, Pattern[1], // rr
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (bc_v2i64 (v2f64
-                                               VR128:$src2))))]),
-         !if(HasPat, Pattern[3], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))])>,
-                                                                    TB, OpSize;
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB, OpSize;
   }
 }
 
 /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
 ///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+                                     SDNode OpNode> {
     defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
-          !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+          !strconcat(OpcodeStr, "ps"), f256mem,
+          [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>, VEX_4V;
 
     defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
-          !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+          !strconcat(OpcodeStr, "pd"), f256mem,
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (bc_v4i64 (v4f64 VR256:$src2))))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>,
+                                    OpSize, VEX_4V;
 }
 
 // AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
-defm VOR  : sse12_fp_packed_logical_y<0x56, "or">;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
-let isCommutable = 0 in
-  defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+defm VAND  : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR   : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR  : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
 
 defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
 defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
 defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
 let isCommutable = 0 in
-  defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
-    // single r+r
-    [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
-    // double r+r
-    [],
-    // single r+m
-    [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
-    // double r+m
-    []]>;
+  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Arithmetic Instructions
@@ -1991,11 +1996,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
 
 // There is no AVX form for instructions below this point
 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "movnti\t{$src, $dst|$dst, $src}",
+                 "movnti{l}\t{$src, $dst|$dst, $src}",
                  [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
                TB, Requires<[HasSSE2]>;
 def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                     "movnti\t{$src, $dst|$dst, $src}",
+                     "movnti{q}\t{$src, $dst|$dst, $src}",
                      [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
                   TB, Requires<[HasSSE2]>;
 }
@@ -2006,13 +2011,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 
 // Prefetch intrinsic.
 def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
-    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
 def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
-    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
 def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
-    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
 def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
-    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
 
 // Load, store, and memory fence
 def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
@@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
 }
 
 // The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version.
+// same, but re-encoded. The 256-bit does not support PI version, and
+// doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
 // FIXME: Change encoding to pseudo! This is blocked right now by the x86
 // JIT implementatioan, it does not expand the instructions below like
 // X86MCInstLower does.
@@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
 def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                    [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
 let ExeDomain = SSEPackedInt in
-def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+def AVX_SET0PI  : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 }
 
 def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
@@ -2063,6 +2071,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
           (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
 
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+def : Pat<(extloadf32 addr:$src),
+          (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
+      Requires<[HasAVX, OptForSpeed]>;
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
@@ -2959,6 +2976,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
 }
 
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Quadword
 //===---------------------------------------------------------------------===//
@@ -3589,6 +3622,16 @@ let Predicates = [HasSSE2] in
  def : Pat<(fextend (loadf32 addr:$src)),
            (CVTSS2SDrm addr:$src)>;
 
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+let Predicates = [HasAVX] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
+                        addr:$src)>;
+
 // bit_convert
 let Predicates = [HasXMMInt] in {
   def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3625,6 +3668,19 @@ let Predicates = [HasXMMInt] in {
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
 }
 
 // Move scalar to XMM zero-extended
@@ -3807,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
           (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
 
 // Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
 let Predicates = [HasSSE1] in {
   def : Pat<(alignedloadv4i32 addr:$src),
             (MOVAPSrm addr:$src)>;
@@ -3835,8 +3893,9 @@ let Predicates = [HasSSE1] in {
             (MOVUPSmr addr:$dst, VR128:$src)>;
 }
 
-// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+// Use vmovaps/vmovups for AVX integer load/store.
 let Predicates = [HasAVX] in {
+  // 128-bit load/store
   def : Pat<(alignedloadv4i32 addr:$src),
             (VMOVAPSrm addr:$src)>;
   def : Pat<(loadv4i32 addr:$src),
@@ -3862,6 +3921,24 @@ let Predicates = [HasAVX] in {
             (VMOVUPSmr addr:$dst, VR128:$src)>;
   def : Pat<(store (v16i8 VR128:$src), addr:$dst),
             (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+  // 256-bit load/store
+  def : Pat<(alignedloadv4i64 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv4i64 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedloadv8i32 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv8i32 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5160,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
 // CLMUL Instructions
 //===----------------------------------------------------------------------===//
 
-// Only the AVX version of CLMUL instructions are described here.
-
 // Carry-less Multiplication instructions
-def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, i8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
 
-def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
 
-// Assembler Only
-multiclass avx_vpclmul<string asm> {
-  def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>;
-
-  def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>;
-}
-defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
-defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
-defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
-defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+multiclass pclmul_alias<string asm, int immop> {
+  def : InstAlias<!strconcat("pclmul", asm, 
+                           "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+  def : InstAlias<!strconcat("pclmul", asm, 
+                             "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
 
 //===----------------------------------------------------------------------===//
 // AVX Instructions
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index f73cff39e86d..31de878343ef 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -411,6 +411,8 @@ let Uses = [RDX, RAX, RCX] in
 let Defs = [RAX, RDI], Uses = [RDX, RDI] in
   def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
 
+def : InstAlias<"xstorerng", (XSTORE)>;
+
 let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
   def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
   def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 55aceba9f270..ce8ef495c001 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -18,26 +18,32 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 namespace {
 class X86MCCodeEmitter : public MCCodeEmitter {
   X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
   MCContext &Ctx;
-  bool Is64BitMode;
 public:
-  X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
-    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
-    Is64BitMode = is64Bit;
+  X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) {
   }
 
   ~X86MCCodeEmitter() {}
 
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86RegisterInfo::getX86RegNum(MO.getReg());
   }
@@ -111,7 +117,7 @@ public:
                          SmallVectorImpl<MCFixup> &Fixups) const;
 
   void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
-                           const MCInst &MI, const TargetInstrDesc &Desc,
+                           const MCInst &MI, const MCInstrDesc &Desc,
                            raw_ostream &OS) const;
 
   void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
@@ -119,23 +125,17 @@ public:
                                  raw_ostream &OS) const;
 
   void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
-                        const MCInst &MI, const TargetInstrDesc &Desc,
+                        const MCInst &MI, const MCInstrDesc &Desc,
                         raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
 
 
-MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
-                                               MCContext &Ctx) {
-  return new X86MCCodeEmitter(TM, Ctx, false);
-}
-
-MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
-                                               MCContext &Ctx) {
-  return new X86MCCodeEmitter(TM, Ctx, true);
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new X86MCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// isDisp8 - Return true if this signed displacement fits in a 8-bit
@@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
   // Handle %rip relative addressing.
   if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
-    assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+    assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
     assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
     EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
 
@@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
       BaseRegNo != N86::ESP &&
       // If there is no base register and we're in 64-bit mode, we need a SIB
       // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
-      (!Is64BitMode || BaseReg != 0)) {
+      (!is64BitMode() || BaseReg != 0)) {
 
     if (BaseReg == 0) {          // [disp32]     in X86-32 mode
       EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
@@ -379,7 +379,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 /// called VEX.
 void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                            int MemOperand, const MCInst &MI,
-                                           const TargetInstrDesc &Desc,
+                                           const MCInstrDesc &Desc,
                                            raw_ostream &OS) const {
   bool HasVEX_4V = false;
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
@@ -586,7 +586,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
 /// size, and 3) use of X86-64 extended registers.
 static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
-                                   const TargetInstrDesc &Desc) {
+                                   const MCInstrDesc &Desc) {
   unsigned REX = 0;
   if (TSFlags & X86II::REX_W)
     REX |= 1 << 3; // set REX.W
@@ -596,7 +596,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
   unsigned NumOps = MI.getNumOperands();
   // FIXME: MCInst should explicitize the two-addrness.
   bool isTwoAddr = NumOps > 1 &&
-                      Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+                      Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
   unsigned i = isTwoAddr ? 1 : 0;
@@ -713,7 +713,7 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
 /// Not present, it is -1.
 void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                         int MemOperand, const MCInst &MI,
-                                        const TargetInstrDesc &Desc,
+                                        const MCInstrDesc &Desc,
                                         raw_ostream &OS) const {
 
   // Emit the lock opcode prefix as needed.
@@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   // Emit the address size opcode prefix as needed.
   if ((TSFlags & X86II::AdSize) ||
-      (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+      (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
     EmitByte(0x67, CurByte, OS);
   
   // Emit the operand size opcode prefix as needed.
@@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   // Handle REX prefix.
   // FIXME: Can this come before F2 etc to simplify emission?
-  if (Is64BitMode) {
+  if (is64BitMode()) {
     if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
       EmitByte(0x40 | REX, CurByte, OS);
   }
@@ -803,7 +803,7 @@ void X86MCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
+  const MCInstrDesc &Desc = MCII.get(Opcode);
   uint64_t TSFlags = Desc.TSFlags;
 
   // Pseudo instructions don't get encoded.
@@ -814,9 +814,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // FIXME: This should be handled during MCInst lowering.
   unsigned NumOps = Desc.getNumOperands();
   unsigned CurOp = 0;
-  if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
+  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
     ++CurOp;
-  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
     // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
     --NumOps;
 
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 793156ffce83..e38533555534 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,8 +16,8 @@
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
-#include "X86MCAsmInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp
index 8f3dd3222489..37110382379e 100644
--- a/lib/Target/X86/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/X86MachObjectWriter.cpp
@@ -8,19 +8,541 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Object/MachOFormat.h"
+
 using namespace llvm;
+using namespace llvm::object;
 
 namespace {
 class X86MachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordScatteredRelocation(MachObjectWriter *Writer,
+                                 const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup,
+                                 MCValue Target,
+                                 unsigned Log2Size,
+                                 uint64_t &FixedValue);
+  void RecordTLVPRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue);
+
+  void RecordX86Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
+  void RecordX86_64Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
 public:
   X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
                       uint32_t CPUSubtype)
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {
+    if (Writer->is64Bit())
+      RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                             FixedValue);
+    else
+      RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                          FixedValue);
+  }
 };
 }
 
+static bool isFixupKindRIPRel(unsigned Kind) {
+  return Kind == X86::reloc_riprel_4byte ||
+    Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+    // FIXME: Remove these!!!
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+                                                 const MCAssembler &Asm,
+                                                 const MCAsmLayout &Layout,
+                                                 const MCFragment *Fragment,
+                                                 const MCFixup &Fixup,
+                                                 MCValue Target,
+                                                 uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // See <reloc.h>.
+  uint32_t FixupOffset =
+    Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+  uint32_t FixupAddress =
+    Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+  int64_t Value = 0;
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  Value = Target.getConstant();
+
+  if (IsPCRel) {
+    // Compensate for the relocation offset, Darwin x86_64 relocations only have
+    // the addend and appear to have attempted to define it to be the actual
+    // expression addend without the PCrel bias. However, instructions with data
+    // following the relocation are not accommodated for (see comment below
+    // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+    Value += 1LL << Log2Size;
+  }
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    Type = macho::RIT_X86_64_Unsigned;
+    Index = 0;
+
+    // FIXME: I believe this is broken, I don't think the linker can understand
+    // it. I think it would require a local relocation, but I'm not sure if that
+    // would work either. The official way to get an absolute PCrel relocation
+    // is to use an absolute symbol (which we don't support yet).
+    if (IsPCRel) {
+      IsExtern = 1;
+      Type = macho::RIT_X86_64_Branch;
+    }
+  } else if (Target.getSymB()) { // A - B + constant
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData &A_SD = Asm.getSymbolData(*A);
+    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+    // Neither symbol can be modified.
+    if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+      report_fatal_error("unsupported relocation of modified symbol");
+
+    // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+    // implement most of these correctly.
+    if (IsPCRel)
+      report_fatal_error("unsupported pc-relative relocation of difference");
+
+    // The support for the situation where one or both of the symbols would
+    // require a local relocation is handled just like if the symbols were
+    // external.  This is certainly used in the case of debug sections where the
+    // section has only temporary symbols and thus the symbols don't have base
+    // symbols.  This is encoded using the section ordinal and non-extern
+    // relocation entries.
+
+    // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+    // single SIGNED relocation); reject it for now.  Except the case where both
+    // symbols don't have a base, equal but both NULL.
+    if (A_Base == B_Base && A_Base)
+      report_fatal_error("unsupported relocation with identical base");
+
+    Value += Writer->getSymbolAddress(&A_SD, Layout) -
+      (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+      (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+    if (A_Base) {
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Unsigned;
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);
+
+    if (B_Base) {
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Subtractor;
+  } else {
+    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+    const MCSymbolData *Base = Asm.getAtom(&SD);
+
+    // Relocations inside debug sections always use local relocations when
+    // possible. This seems to be done because the debugger doesn't fully
+    // understand x86_64 relocation entries, and expects to find values that
+    // have already been fixed up.
+    if (Symbol->isInSection()) {
+      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+        Fragment->getParent()->getSection());
+      if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+        Base = 0;
+    }
+
+    // x86_64 almost always uses external relocations, except when there is no
+    // symbol to use as a base address (a local symbol with no preceding
+    // non-local symbol).
+    if (Base) {
+      Index = Base->getIndex();
+      IsExtern = 1;
+
+      // Add the local offset, if needed.
+      if (Base != &SD)
+        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+      // The index is the section ordinal (1-based).
+      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+      Value += Writer->getSymbolAddress(&SD, Layout);
+
+      if (IsPCRel)
+        Value -= FixupAddress + (1 << Log2Size);
+    } else if (Symbol->isVariable()) {
+      const MCExpr *Value = Symbol->getVariableValue();
+      int64_t Res;
+      bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+                                             Writer->getSectionAddressMap());
+      if (isAbs) {
+        FixedValue = Res;
+        return;
+      } else {
+        report_fatal_error("unsupported relocation of variable '" +
+                           Symbol->getName() + "'");
+      }
+    } else {
+      report_fatal_error("unsupported relocation of undefined symbol '" +
+                         Symbol->getName() + "'");
+    }
+
+    MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+    if (IsPCRel) {
+      if (IsRIPRel) {
+        if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+          // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+          // rewrite the movq to an leaq at link time if the symbol ends up in
+          // the same linkage unit.
+          if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+            Type = macho::RIT_X86_64_GOTLoad;
+          else
+            Type = macho::RIT_X86_64_GOT;
+        }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          Type = macho::RIT_X86_64_TLV;
+        }  else if (Modifier != MCSymbolRefExpr::VK_None) {
+          report_fatal_error("unsupported symbol modifier in relocation");
+        } else {
+          Type = macho::RIT_X86_64_Signed;
+
+          // The Darwin x86_64 relocation format has a problem where it cannot
+          // encode an address (L<foo> + <constant>) which is outside the atom
+          // containing L<foo>. Generally, this shouldn't occur but it does
+          // happen when we have a RIPrel instruction with data following the
+          // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+          // adjustment Darwin x86_64 uses, the offset is still negative and the
+          // linker has no way to recognize this.
+          //
+          // To work around this, Darwin uses several special relocation types
+          // to indicate the offsets. However, the specification or
+          // implementation of these seems to also be incomplete; they should
+          // adjust the addend as well based on the actual encoded instruction
+          // (the additional bias), but instead appear to just look at the final
+          // offset.
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
+          case 1: Type = macho::RIT_X86_64_Signed1; break;
+          case 2: Type = macho::RIT_X86_64_Signed2; break;
+          case 4: Type = macho::RIT_X86_64_Signed4; break;
+          }
+        }
+      } else {
+        if (Modifier != MCSymbolRefExpr::VK_None)
+          report_fatal_error("unsupported symbol modifier in branch "
+                             "relocation");
+
+        Type = macho::RIT_X86_64_Branch;
+      }
+    } else {
+      if (Modifier == MCSymbolRefExpr::VK_GOT) {
+        Type = macho::RIT_X86_64_GOT;
+      } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+        // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+        // case all we do is set the PCrel bit in the relocation entry; this is
+        // used with exception handling, for example. The source is required to
+        // include any necessary offset directly.
+        Type = macho::RIT_X86_64_GOT;
+        IsPCRel = 1;
+      } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+        report_fatal_error("TLVP symbol modifier should have been rip-rel");
+      } else if (Modifier != MCSymbolRefExpr::VK_None)
+        report_fatal_error("unsupported symbol modifier in relocation");
+      else
+        Type = macho::RIT_X86_64_Unsigned;
+    }
+  }
+
+  // x86_64 always writes custom values into the fixups.
+  FixedValue = Value;
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    //
+    // Note that there is no longer any semantic difference between these two
+    // relocation types from the linkers point of view, this is done solely for
+    // pedantic compatibility with 'as'.
+    Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+      (unsigned)macho::RIT_Generic_LocalDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+                                               const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout,
+                                               const MCFragment *Fragment,
+                                               const MCFixup &Fixup,
+                                               MCValue Target,
+                                               uint64_t &FixedValue) {
+  assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+         !is64Bit() &&
+         "Should only be called with a 32-bit TLVP relocation!");
+
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+  uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = 0;
+
+  // Get the symbol data.
+  MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  unsigned Index = SD_A->getIndex();
+
+  // We're only going to have a second symbol in pic mode and it'll be a
+  // subtraction from the picbase. For 32-bit pic the addend is the difference
+  // between the picbase and the next address.  For 32-bit static the addend is
+  // zero.
+  if (Target.getSymB()) {
+    // If this is a subtraction then we're pcrel.
+    uint32_t FixupAddress =
+      Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+    MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+    IsPCRel = 1;
+    FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+                  Target.getConstant());
+    FixedValue += 1ULL << Log2Size;
+  } else {
+    FixedValue = 0;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = Value;
+  MRE.Word1 = ((Index                  <<  0) |
+               (IsPCRel                << 24) |
+               (Log2Size               << 25) |
+               (1                      << 27) | // Extern
+               (macho::RIT_Generic_TLV << 28)); // Type
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+                                              const MCAssembler &Asm,
+                                              const MCAsmLayout &Layout,
+                                              const MCFragment *Fragment,
+                                              const MCFixup &Fixup,
+                                              MCValue Target,
+                                              uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // If this is a 32-bit TLVP reloc it's handled a bit differently.
+  if (Target.getSymA() &&
+      Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+    RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                         FixedValue);
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry. Differences always require scattered
+  // relocations.
+  if (Target.getSymB())
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // If this is an internal relocation with an offset, it also needs a scattered
+  // relocation entry.
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    //
+    // FIXME: Currently, these are never generated (see code below). I cannot
+    // find a case where they are actually emitted.
+    Type = macho::RIT_Vanilla;
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    Type = macho::RIT_Vanilla;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
 MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
                                                 bool Is64Bit,
                                                 uint32_t CPUType,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1ad6203af2f6..f2faf59367a1 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -39,6 +39,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
 using namespace llvm;
 
 cl::opt<bool>
@@ -49,18 +53,11 @@ ForceStackAlign("force-align-stack",
 
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
-  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
-                         X86::ADJCALLSTACKDOWN64 :
-                         X86::ADJCALLSTACKDOWN32,
-                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
-                         X86::ADJCALLSTACKUP64 :
-                         X86::ADJCALLSTACKUP32),
-    TM(tm), TII(tii) {
+  : X86GenRegisterInfo(), TM(tm), TII(tii) {
   // Cache some information.
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   Is64Bit = Subtarget->is64Bit();
   IsWin64 = Subtarget->isTargetWin64();
-  StackAlign = TM.getFrameLowering()->getStackAlignment();
 
   if (Is64Bit) {
     SlotSize = 8;
@@ -107,6 +104,21 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
 }
 
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+  switch (getLLVMRegNum(RegNum, isEH)) {
+  case X86::EBX: case X86::RBX: return 1;
+  case X86::ECX: case X86::R12: return 2;
+  case X86::EDX: case X86::R13: return 3;
+  case X86::EDI: case X86::R14: return 4;
+  case X86::ESI: case X86::R15: return 5;
+  case X86::EBP: case X86::RBP: return 6;
+  }
+
+  return -1;
+}
+
 int
 X86RegisterInfo::getSEHRegNum(unsigned i) const {
   int reg = getX86RegNum(i);
@@ -495,18 +507,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(X86::BPL);
   }
 
-  // Mark the x87 stack registers as reserved, since they don't behave normally
-  // with respect to liveness. We don't fully model the effects of x87 stack
-  // pushes and pops after stackification.
-  Reserved.set(X86::ST0);
-  Reserved.set(X86::ST1);
-  Reserved.set(X86::ST2);
-  Reserved.set(X86::ST3);
-  Reserved.set(X86::ST4);
-  Reserved.set(X86::ST5);
-  Reserved.set(X86::ST6);
-  Reserved.set(X86::ST7);
-
   // Mark the segment registers as reserved.
   Reserved.set(X86::CS);
   Reserved.set(X86::SS);
@@ -517,13 +517,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // Reserve the registers that only exist in 64-bit mode.
   if (!Is64Bit) {
+    // These 8-bit registers are part of the x86-64 extension even though their
+    // super-registers are old 32-bits.
+    Reserved.set(X86::SIL);
+    Reserved.set(X86::DIL);
+    Reserved.set(X86::BPL);
+    Reserved.set(X86::SPL);
+
     for (unsigned n = 0; n != 8; ++n) {
+      // R8, R9, ...
       const unsigned GPR64[] = {
         X86::R8,  X86::R9,  X86::R10, X86::R11,
         X86::R12, X86::R13, X86::R14, X86::R15
       };
-      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
-           ++AI)
+      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
         Reserved.set(Reg);
 
       // XMM8, XMM9, ...
@@ -550,6 +557,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
@@ -608,7 +616,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
   int Opcode = I->getOpcode();
-  bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
   DebugLoc DL = I->getDebugLoc();
   uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
   uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
@@ -625,16 +633,17 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
+    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
 
     MachineInstr *New = 0;
-    if (Opcode == getCallFrameSetupOpcode()) {
+    if (Opcode == TII.getCallFrameSetupOpcode()) {
       New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
                     StackPtr)
         .addReg(StackPtr)
         .addImm(Amount);
     } else {
-      assert(Opcode == getCallFrameDestroyOpcode());
+      assert(Opcode == TII.getCallFrameDestroyOpcode());
 
       // Factor out the amount the callee already popped.
       Amount -= CalleeAmt;
@@ -657,7 +666,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     return;
   }
 
-  if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+  if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
     // If we are performing frame pointer elimination and if the callee pops
     // something off the stack pointer, add it back.  We do this until we have
     // more advanced stack pointer tracking ability.
@@ -667,6 +676,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
     // The EFLAGS implicit def is dead.
     New->getOperand(3).setIsDead();
+
+    // We are not tracking the stack pointer adjustment by the callee, so make
+    // sure we restore the stack pointer immediately after the call, there may
+    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+    MachineBasicBlock::iterator B = MBB.begin();
+    while (I != B && !llvm::prior(I)->getDesc().isCall())
+      --I;
     MBB.insert(I, New);
   }
 }
@@ -713,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
-    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
+    int Imm = (int)(MI.getOperand(i + 3).getImm());
+    int Offset = FIOffset + Imm;
+    assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+           "Requesting 64-bit offset in 32-bit immediate!");
     MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
@@ -910,8 +929,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 }
 }
 
-#include "X86GenRegisterInfo.inc"
-
 namespace {
   struct MSAH : public MachineFunctionPass {
     static char ID;
@@ -920,10 +937,10 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       const X86TargetMachine *TM =
         static_cast<const X86TargetMachine *>(&MF.getTarget());
-      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      const TargetFrameLowering *TFI = TM->getFrameLowering();
       MachineRegisterInfo &RI = MF.getRegInfo();
       X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-      unsigned StackAlignment = X86RI->getStackAlignment();
+      unsigned StackAlignment = TFI->getStackAlignment();
 
       // Be over-conservative: scan over all vreg defs and find whether vector
       // registers are used. If yes, there is a possibility that vector register
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index dd3d3dcdcce5..a12eb1297f7e 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -15,7 +15,9 @@
 #define X86REGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "X86GenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
 
 namespace llvm {
   class Type;
@@ -56,10 +58,6 @@ private:
   ///
   unsigned SlotSize;
 
-  /// StackAlign - Default stack alignment.
-  ///
-  unsigned StackAlign;
-
   /// StackPtr - X86 physical register used as stack ptr.
   ///
   unsigned StackPtr;
@@ -75,8 +73,6 @@ public:
   /// register identifier.
   static unsigned getX86RegNum(unsigned RegNo);
 
-  unsigned getStackAlignment() const { return StackAlign; }
-
   /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
   /// (created by TableGen) for target dependencies.
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
@@ -85,6 +81,10 @@ public:
   // FIXME: This should be tablegen'd like getDwarfRegNum is
   int getSEHRegNum(unsigned i) const;
 
+  /// getCompactUnwindRegNum - This function maps the register to the number for
+  /// compact unwind encoding. Return -1 if the register isn't valid.
+  int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
   /// Code Generation virtual methods...
   /// 
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index f1d149c3fbc7..203722a66162 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -206,15 +206,22 @@ let Namespace = "X86" in {
   def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
   }
 
-  // Floating point stack registers
-  def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
-  def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
-  def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>;
-  def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>;
-  def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
-  def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
-  def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
-  def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+  class STRegister<string Name, list<Register> A> : Register<Name> {
+    let Aliases = A;
+  }
+
+  // Floating point stack registers. These don't map one-to-one to the FP
+  // pseudo registers, but we still mark them as aliasing FP registers. That
+  // way both kinds can be live without exceeding the stack depth. ST registers
+  // are only live around inline assembly.
+  def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
+  def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
+  def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
+  def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
+  def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
+  def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
+  def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
+  def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
 
   // Status flags register
   def EFLAGS : Register<"flags">;
@@ -279,58 +286,23 @@ let Namespace = "X86" in {
 // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
 // cannot be encoded.
 def GR8 : RegisterClass<"X86", [i8],  8,
-                        [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
-                         R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned X86_GR8_AO_64[] = {
-      X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
-      X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-      X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
-    };
-
-    GR8Class::iterator
-    GR8Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_AO_64;
-      else
-        return begin();
-    }
-
-    GR8Class::iterator
-    GR8Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
-      // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (!Subtarget.is64Bit())
-        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-        return begin() + 8;
-      else if (TFI->hasFP(MF) || MFI->getReserveFP())
-        // If so, don't allocate SPL or BPL.
-        return array_endof(X86_GR8_AO_64) - 1;
-      else
-        // If not, just don't allocate SPL.
-        return array_endof(X86_GR8_AO_64);
-    }
+                        (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                             R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 
 def GR16 : RegisterClass<"X86", [i16], 16,
-                         [AX, CX, DX, SI, DI, BX, BP, SP,
-                          R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+                         (add AX, CX, DX, SI, DI, BX, BP, SP,
+                              R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
 }
 
 def GR32 : RegisterClass<"X86", [i32], 32,
-                         [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
-                          R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+                         (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                              R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
 
@@ -338,8 +310,8 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 // RIP isn't really a register and it can't be used anywhere except in an
 // address, but it doesn't cause trouble.
 def GR64 : RegisterClass<"X86", [i64], 64,
-                         [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                          RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
+                         (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                              RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32 sub_32bit)];
@@ -348,16 +320,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
 // Segment registers for use by MOV instructions (and others) that have a
 //   segment register as one operand.  Always contain a 16-bit segment
 //   descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
 
 // Debug registers.
-def DEBUG_REG : RegisterClass<"X86", [i32], 32,
-                              [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
 
 // Control registers.
-def CONTROL_REG : RegisterClass<"X86", [i64], 64,
-                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
-                                 CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
 // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -365,99 +334,69 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
 // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
 // and GR64_ABCD are classes for registers that support 8-bit h-register
 // operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
-def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
 }
-def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit)];
 }
-def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit),
                        (GR32_ABCD sub_32bit)];
 }
-def GR32_TC   : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
-def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
-                                                 R8, R9, R11, RIP]> {
+def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+                                                     R8, R9, R11, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32_TC sub_32bit)];
 }
 
-def GR64_TCW64   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
-                                                    R8, R9, R11]>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+                                                      R8, R9, R11)>;
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
-                              [AL, CL, DL, AH, CH, DH, BL, BH]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // In 64-bit mode, it's not safe to blindly allocate H registers.
-    static const unsigned X86_GR8_NOREX_AO_64[] = {
-      X86::AL, X86::CL, X86::DL, X86::BL
-    };
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_NOREX_AO_64;
-      else
-        return begin();
-    }
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return array_endof(X86_GR8_NOREX_AO_64);
-      else
-        return end();
-    }
+                              (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 // GR16_NOREX - GR16 registers which do not require a REX prefix.
 def GR16_NOREX : RegisterClass<"X86", [i16], 16,
-                               [AX, CX, DX, SI, DI, BX, BP, SP]> {
+                               (add AX, CX, DX, SI, DI, BX, BP, SP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
 }
 // GR32_NOREX - GR32 registers which do not require a REX prefix.
 def GR32_NOREX : RegisterClass<"X86", [i32], 32,
-                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+                               (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit)];
 }
 // GR64_NOREX - GR64 registers which do not require a REX prefix.
 def GR64_NOREX : RegisterClass<"X86", [i64], 64,
-                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+                            (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit),
                        (GR32_NOREX sub_32bit)];
 }
 
 // GR32_NOSP - GR32 registers except ESP.
-def GR32_NOSP : RegisterClass<"X86", [i32], 32,
-                              [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
-                               R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
 
 // GR64_NOSP - GR64 registers except RSP (and RIP).
-def GR64_NOSP : RegisterClass<"X86", [i64], 64,
-                              [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                               RBX, R14, R15, R12, R13, RBP]> {
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32_NOSP sub_32bit)];
@@ -466,36 +405,30 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
 // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
 // ESP.
 def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
-                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP]> {
+                                    (and GR32_NOREX, GR32_NOSP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit)];
 }
 
 // GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
 def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
-                                    [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+                                    (and GR64_NOREX, GR64_NOSP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit),
                        (GR32_NOREX_NOSP sub_32bit)];
 }
 
 // A class to support the 'A' assembler constraint: EAX then EDX.
-def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit)];
 }
 
 // Scalar SSE2 floating point registers.
-def FR32 : RegisterClass<"X86", [f32], 32,
-                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                          XMM8, XMM9, XMM10, XMM11,
-                          XMM12, XMM13, XMM14, XMM15]>;
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
 
-def FR64 : RegisterClass<"X86", [f64], 64,
-                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                          XMM8, XMM9, XMM10, XMM11,
-                          XMM12, XMM13, XMM14, XMM15]>;
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
 
 
 // FIXME: This sets up the floating point register files as though they are f64
@@ -504,37 +437,31 @@ def FR64 : RegisterClass<"X86", [f64], 64,
 // faster on common hardware.  In reality, this should be controlled by a
 // command line option or something.
 
-def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
 
 // Floating point stack registers (these are not allocatable by the
 // register allocator - the floating point stackifier is responsible
 // for transforming FPn allocations to STn registers)
-def RST : RegisterClass<"X86", [f80, f64, f32], 32,
-                        [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
   let isAllocatable = 0;
 }
 
 // Generic vector registers: VR64 and VR128.
-def VR64: RegisterClass<"X86", [x86mmx], 64,
-                          [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
-                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                           XMM8, XMM9, XMM10, XMM11,
-                           XMM12, XMM13, XMM14, XMM15]> {
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                          128, (add FR32)> {
   let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
 }
 
 def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
-                          [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
-                           YMM8, YMM9, YMM10, YMM11,
-                           YMM12, YMM13, YMM14, YMM15]> {
+                          (sequence "YMM%u", 0, 15)> {
   let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
 }
 
 // Status flags registers.
-def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
   let isAllocatable = 0;
 }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 481e821030b3..5e6c659e5393 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the X86 specific subclass of TargetSubtarget.
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "subtarget"
 #include "X86Subtarget.h"
 #include "X86InstrInfo.h"
-#include "X86GenSubtarget.inc"
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
 using namespace llvm;
 
 #if defined(_MSC_VER)
@@ -154,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const {
 /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
 /// to immediate address.
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
-  if (Is64Bit)
+  if (In64BitMode)
     return false;
   return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
 }
@@ -170,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
   return 200;
 }
 
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments.  If we can't run cpuid on the host, return true.
-static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
-                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-  #if defined(__GNUC__)
-    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-    asm ("movq\t%%rbx, %%rsi\n\t"
-         "cpuid\n\t"
-         "xchgq\t%%rbx, %%rsi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    int registers[4];
-    __cpuid(registers, value);
-    *rEAX = registers[0];
-    *rEBX = registers[1];
-    *rECX = registers[2];
-    *rEDX = registers[3];
-    return false;
-  #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  #if defined(__GNUC__)
-    asm ("movl\t%%ebx, %%esi\n\t"
-         "cpuid\n\t"
-         "xchgl\t%%ebx, %%esi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    __asm {
-      mov   eax,value
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-    }
-    return false;
-  #endif
-#endif
-  return true;
-}
-
-static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
-  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
-  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
-  if (Family == 6 || Family == 0xf) {
-    if (Family == 0xf)
-      // Examine extended family ID if family ID is F.
-      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
-    // Examine extended model ID if family ID is 6 or F.
-    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
-  }
-}
-
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   union {
@@ -244,50 +180,66 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     char     c[12];
   } text;
   
-  if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+  if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
     return;
 
-  GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+  X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
   
-  if ((EDX >> 15) & 1) HasCMov = true;
-  if ((EDX >> 23) & 1) X86SSELevel = MMX;
-  if ((EDX >> 25) & 1) X86SSELevel = SSE1;
-  if ((EDX >> 26) & 1) X86SSELevel = SSE2;
-  if (ECX & 0x1)       X86SSELevel = SSE3;
-  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
-  if ((ECX >> 19) & 1) X86SSELevel = SSE41;
-  if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+  if ((EDX >> 15) & 1) HasCMov = true;      ToggleFeature(X86::FeatureCMOV);
+  if ((EDX >> 23) & 1) X86SSELevel = MMX;   ToggleFeature(X86::FeatureMMX);
+  if ((EDX >> 25) & 1) X86SSELevel = SSE1;  ToggleFeature(X86::FeatureSSE1);
+  if ((EDX >> 26) & 1) X86SSELevel = SSE2;  ToggleFeature(X86::FeatureSSE2);
+  if (ECX & 0x1)       X86SSELevel = SSE3;  ToggleFeature(X86::FeatureSSE3);
+  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
+  if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
+  if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
   // FIXME: AVX codegen support is not ready.
-  //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
+  //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
 
-  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
-  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);
-  HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1);
-  HasAES   = IsIntel && ((ECX >> 25) & 0x1);
+  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);   ToggleFeature(X86::FeatureCLMUL);
+  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);  ToggleFeature(X86::FeatureFMA3);
+  HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
+  HasAES   = IsIntel && ((ECX >> 25) & 0x1);  ToggleFeature(X86::FeatureAES);
 
   if (IsIntel || IsAMD) {
     // Determine if bit test memory instructions are slow.
     unsigned Family = 0;
     unsigned Model  = 0;
-    DetectFamilyModel(EAX, Family, Model);
-    IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+    X86_MC::DetectFamilyModel(EAX, Family, Model);
+    if (IsAMD || (Family == 6 && Model >= 13)) {
+      IsBTMemSlow = true;
+      ToggleFeature(X86::FeatureSlowBTMem);
+    }
     // If it's Nehalem, unaligned memory access is fast.
-    if (Family == 15 && Model == 26)
+    if (Family == 15 && Model == 26) {
       IsUAMemFast = true;
+      ToggleFeature(X86::FeatureFastUAMem);
+    }
 
-    GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-    HasX86_64 = (EDX >> 29) & 0x1;
-    HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
-    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
+    X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+    if ((EDX >> 29) & 0x1) {
+      HasX86_64 = true;
+      ToggleFeature(X86::Feature64Bit);
+    }
+    if (IsAMD && ((ECX >> 6) & 0x1)) {
+      HasSSE4A = true;
+      ToggleFeature(X86::FeatureSSE4A);
+    }
+    if (IsAMD && ((ECX >> 16) & 0x1)) {
+      HasFMA4 = true;
+      ToggleFeature(X86::FeatureFMA4);
+    }
   }
 }
 
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, 
-                           bool is64Bit)
-  : PICStyle(PICStyles::None)
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, 
+                           unsigned StackAlignOverride, bool is64Bit)
+  : X86GenSubtargetInfo(TT, CPU, FS)
+  , PICStyle(PICStyles::None)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
   , HasCMov(false)
@@ -306,73 +258,66 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   // FIXME: this is a known good value for Yonah. How about others?
   , MaxInlineSizeThreshold(128)
   , TargetTriple(TT)
-  , Is64Bit(is64Bit) {
-
-  // default to hard float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Hard;
-    
+  , In64BitMode(is64Bit) {
   // Determine default and user specified characteristics
-  if (!FS.empty()) {
+  if (!FS.empty() || !CPU.empty()) {
+    std::string CPUName = CPU;
+    if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+      CPUName = sys::getHostCPUName();
+#else
+      CPUName = "generic";
+#endif
+    }
+
+    // Make sure 64-bit features are available in 64-bit mode. (But make sure
+    // SSE2 can be turned off explicitly.)
+    std::string FullFS = FS;
+    if (In64BitMode) {
+      if (!FullFS.empty())
+        FullFS = "+64bit,+sse2," + FullFS;
+      else
+        FullFS = "+64bit,+sse2";
+    }
+
     // If feature string is not empty, parse features string.
-    std::string CPU = sys::getHostCPUName();
-    ParseSubtargetFeatures(FS, CPU);
-    // All X86-64 CPUs also have SSE2, however user might request no SSE via 
-    // -mattr, so don't force SSELevel here.
-    if (HasAVX)
-      X86SSELevel = NoMMXSSE;
+    ParseSubtargetFeatures(CPUName, FullFS);
   } else {
     // Otherwise, use CPUID to auto-detect feature set.
     AutoDetectSubtargetFeatures();
-    // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
-    if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
-      X86SSELevel = SSE2;
-  }
 
-  // If requesting codegen for X86-64, make sure that 64-bit features
-  // are enabled.
-  if (Is64Bit) {
-    HasX86_64 = true;
+    // Make sure 64-bit features are available in 64-bit mode.
+    if (In64BitMode) {
+      HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+      HasCMov = true;   ToggleFeature(X86::FeatureCMOV);
 
-    // All 64-bit cpus have cmov support.
-    HasCMov = true;
+      if (!HasAVX && X86SSELevel < SSE2) {
+        X86SSELevel = SSE2;
+        ToggleFeature(X86::FeatureSSE1);
+        ToggleFeature(X86::FeatureSSE2);
+      }
+    }
   }
+
+  // It's important to keep the MCSubtargetInfo feature bits in sync with
+  // target data structure which is shared with MC code emitter, etc.
+  if (In64BitMode)
+    ToggleFeature(X86::Mode64Bit);
+
+  if (HasAVX)
+    X86SSELevel = NoMMXSSE;
     
   DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                << ", 3DNowLevel " << X863DNowLevel
                << ", 64bit " << HasX86_64 << "\n");
-  assert((!Is64Bit || HasX86_64) &&
+  assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
   // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
-  if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
-      isTargetSolaris() || Is64Bit)
+  if (StackAlignOverride)
+    stackAlignment = StackAlignOverride;
+  else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+           isTargetSolaris() || In64BitMode)
     stackAlignment = 16;
-
-  if (StackAlignment)
-    stackAlignment = StackAlignment;
-}
-
-/// IsCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
-bool X86Subtarget::IsCalleePop(bool IsVarArg,
-                               CallingConv::ID CallingConv) const {
-  if (IsVarArg)
-    return false;
-
-  switch (CallingConv) {
-  default:
-    return false;
-  case CallingConv::X86_StdCall:
-    return !is64Bit();
-  case CallingConv::X86_FastCall:
-    return !is64Bit();
-  case CallingConv::X86_ThisCall:
-    return !is64Bit();
-  case CallingConv::Fast:
-    return GuaranteedTailCallOpt;
-  case CallingConv::GHC:
-    return GuaranteedTailCallOpt;
-  }
 }
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 286a7982a699..6d22027b7aa8 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the X86 specific subclass of TargetSubtarget.
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,12 +15,16 @@
 #define X86SUBTARGET_H
 
 #include "llvm/ADT/Triple.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/CallingConv.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 class TargetMachine;
 
 /// PICStyles - The X86 backend supports a number of different styles of PIC.
@@ -35,7 +39,7 @@ enum Style {
 };
 }
 
-class X86Subtarget : public TargetSubtarget {
+class X86Subtarget : public X86GenSubtargetInfo {
 protected:
   enum X86SSEEnum {
     NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
@@ -108,16 +112,17 @@ protected:
   Triple TargetTriple;
 
 private:
-  /// Is64Bit - True if the processor supports 64-bit instructions and
-  /// pointer size is 64 bit.
-  bool Is64Bit;
+  /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+  bool In64BitMode;
 
 public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+  X86Subtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS,
+               unsigned StackAlignOverride, bool is64Bit);
 
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
@@ -130,14 +135,13 @@ public:
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
   /// instruction.
   void AutoDetectSubtargetFeatures();
 
-  bool is64Bit() const { return Is64Bit; }
+  bool is64Bit() const { return In64BitMode; }
 
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
@@ -195,7 +199,7 @@ public:
   }
 
   bool isTargetWin64() const {
-    return Is64Bit && (isTargetMingw() || isTargetWindows());
+    return In64BitMode && (isTargetMingw() || isTargetWindows());
   }
 
   bool isTargetEnvMacho() const {
@@ -203,7 +207,7 @@ public:
   }
 
   bool isTargetWin32() const {
-    return !Is64Bit && (isTargetMingw() || isTargetWindows());
+    return !In64BitMode && (isTargetMingw() || isTargetWindows());
   }
 
   bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
@@ -248,9 +252,6 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   unsigned getSpecialAddressLatency() const;
-
-  /// IsCalleePop - Test whether a function should pop its own arguments.
-  bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 74833291dc7a..9cab0e089098 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86MCAsmInfo.h"
 #include "X86TargetMachine.h"
 #include "X86.h"
 #include "llvm/PassManager.h"
@@ -24,22 +23,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
-    if (TheTriple.getArch() == Triple::x86_64)
-      return new X86_64MCAsmInfoDarwin(TheTriple);
-    else
-      return new X86MCAsmInfoDarwin(TheTriple);
-  }
-
-  if (TheTriple.isOSWindows())
-    return new X86MCAsmInfoCOFF(TheTriple);
-
-  return new X86ELFMCAsmInfo(TheTriple);
-}
-
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
                                     raw_ostream &_OS,
@@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
   RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
-  RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
-
   // Register the code emitter.
   TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
-                                      createX86_32MCCodeEmitter);
+                                      createX86MCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
-                                      createX86_64MCCodeEmitter);
+                                      createX86MCCodeEmitter);
 
   // Register the asm backend.
   TargetRegistry::RegisterAsmBackend(TheX86_32Target,
@@ -87,8 +66,9 @@ extern "C" void LLVMInitializeX86Target() {
 
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, false),
+  : X86TargetMachine(T, TT, CPU, FS, false),
     DataLayout(getSubtargetImpl()->isTargetDarwin() ?
                "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
                (getSubtargetImpl()->isTargetCygMing() ||
@@ -103,8 +83,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
 
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU, 
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, true),
+  : X86TargetMachine(T, TT, CPU, FS, true),
     DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
     InstrInfo(*this),
     TSInfo(*this),
@@ -115,9 +96,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
 /// X86TargetMachine ctor - Create an X86 target.
 ///
 X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS, bool is64Bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
     FrameLowering(*this, Subtarget),
     ELFWriterInfo(is64Bit, true) {
   DefRelocModel = getRelocationModel();
@@ -182,6 +164,10 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
   // Finally, if we have "none" as our PIC style, force to static mode.
   if (Subtarget.getPICStyle() == PICStyles::None)
     setRelocationModel(Reloc::Static);
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;    
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 597392251e6a..885334a365fe 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -43,7 +43,8 @@ private:
   
 public:
   X86TargetMachine(const Target &T, const std::string &TT, 
-                   const std::string &FS, bool is64Bit);
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
 
   virtual const X86InstrInfo     *getInstrInfo() const {
     llvm_unreachable("getInstrInfo not implemented");
@@ -87,7 +88,7 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, const std::string &M,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
@@ -113,7 +114,7 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 9093de691582..a1d73c6b4f99 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS XCore.td)
 
-tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(XCoreGenRegisterNames.inc -gen-register-enums)
-tablegen(XCoreGenRegisterInfo.inc -gen-register-desc)
-tablegen(XCoreGenInstrNames.inc -gen-instr-enums)
-tablegen(XCoreGenInstrInfo.inc -gen-instr-desc)
+tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
+tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
 tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
 tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
 tablegen(XCoreGenCallingConv.inc -gen-callingconv)
-tablegen(XCoreGenSubtarget.inc -gen-subtarget)
+tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(XCoreCodeGen
   XCoreAsmPrinter.cpp
@@ -16,7 +13,6 @@ add_llvm_target(XCoreCodeGen
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
-  XCoreMCAsmInfo.cpp
   XCoreRegisterInfo.cpp
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
@@ -25,3 +21,4 @@ add_llvm_target(XCoreCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..c3b3dc9e647d
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMXCoreDesc
+  XCoreMCTargetDesc.cpp
+  XCoreMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/XCore/MCTargetDesc/Makefile b/lib/Target/XCore/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..de61543bfe9c
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 42ab1b31d57a..42ab1b31d57a 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 840392263881..840392263881 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 000000000000..939d97c9d87c
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitXCoreMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+                                          createXCoreMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeXCoreMCAsmInfo() {
+  RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 000000000000..3cfc3764a62c
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index 6c1ef886031b..b823c4ed37e9 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMXCoreCodeGen
 TARGET = XCore
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
-                XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \
-                XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \
+BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \
+		XCoreGenAsmWriter.inc \
                 XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
-		XCoreGenSubtarget.inc
+		XCoreGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 8937fbe123c6..b8fb0cac319b 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_XCORE_H
 #define TARGET_XCORE_H
 
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -25,17 +26,6 @@ namespace llvm {
 
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
 
-  extern Target TheXCoreTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for XCore registers.  This defines a mapping from
-// register name to register number.
-//
-#include "XCoreGenRegisterNames.inc"
-
-// Defines symbolic names for the XCore instructions.
-//
-#include "XCoreGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f06dd32662f..1a43714d63b9 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -16,7 +16,6 @@
 #include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreSubtarget.h"
-#include "XCoreMCAsmInfo.h"
 #include "XCoreTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -27,6 +26,7 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -114,7 +114,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   
   MCSymbol *GVSym = Mang->getSymbol(GV);
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
   
   // Mark the start of the global
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 8cabbbf16c35..6d040e052659 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1591,21 +1591,18 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 //                           XCore Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-std::vector<unsigned> XCoreTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const
-{
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+			     EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
     default : break;
     case 'r':
-      return make_vector<unsigned>(XCore::R0, XCore::R1,  XCore::R2,
-                                   XCore::R3, XCore::R4,  XCore::R5,
-                                   XCore::R6, XCore::R7,  XCore::R8,
-                                   XCore::R9, XCore::R10, XCore::R11, 0);
-      break;
+      return std::make_pair(0U, XCore::GRRegsRegisterClass);
+    }
   }
-  return std::vector<unsigned>();
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index a8d67d4ad21e..9c803bef6dd2 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -148,9 +148,9 @@ namespace llvm {
     SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
 
     // Inline asm support
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
 
     // Expand specifics
     SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 9cb6a7d17b5e..f90481f3fbc9 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -18,11 +18,14 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "XCoreGenInstrInfo.inc"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
 namespace llvm {
 namespace XCore {
 
@@ -38,7 +41,7 @@ namespace XCore {
 using namespace llvm;
 
 XCoreInstrInfo::XCoreInstrInfo()
-  : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
+  : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
     RI(*this) {
 }
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 977fe8dd550a..840b1e163652 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "XCoreRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
 namespace llvm {
 
-class XCoreInstrInfo : public TargetInstrInfoImpl {
+class XCoreInstrInfo : public XCoreGenInstrInfo {
   const XCoreRegisterInfo RI;
 public:
   XCoreInstrInfo();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 46c9e57c1af5..357a4a083582 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -33,11 +33,13 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
 using namespace llvm;
 
 XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
-  : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
-    TII(tii) {
+  : XCoreGenRegisterInfo(), TII(tii) {
 }
 
 // helper functions
@@ -193,7 +195,16 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   #endif
 
   Offset += StackSize;
-  
+
+  unsigned FrameReg = getFrameRegister(MF);
+
+  // Special handling of DBG_VALUE instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
   // fold constant into offset.
   Offset += MI.getOperand(i + 1).getImm();
   MI.getOperand(i + 1).ChangeToImmediate(0);
@@ -205,7 +216,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   Offset/=4;
   
   bool FP = TFI->hasFP(MF);
-  
+
   unsigned Reg = MI.getOperand(0).getReg();
   bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
 
@@ -216,7 +227,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   
   if (FP) {
     bool isUs = isImmUs(Offset);
-    unsigned FramePtr = XCore::R10;
     
     if (!isUs) {
       if (!RS)
@@ -228,18 +238,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::STWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
               .addReg(Reg, getKillRegState(isKill))
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::LDAWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       default:
@@ -249,18 +259,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       case XCore::STWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
               .addReg(Reg, getKillRegState(isKill))
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       case XCore::LDAWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       default:
@@ -328,6 +338,3 @@ unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 unsigned XCoreRegisterInfo::getRARegister() const {
   return XCore::LR;
 }
-
-#include "XCoreGenRegisterInfo.inc"
-
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 7a9bc9fb8705..801d9eba2171 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -15,7 +15,9 @@
 #define XCOREREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "XCoreGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 09510976dd06..c3542304a4ec 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -44,13 +44,13 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
 //
 def GRRegs : RegisterClass<"XCore", [i32], 32,
   // Return values and arguments
-  [R0, R1, R2, R3,
+  (add R0, R1, R2, R3,
   // Not preserved across procedure calls
   R11,
   // Callee save
-  R4, R5, R6, R7, R8, R9, R10]>;
+  R4, R5, R6, R7, R8, R9, R10)>;
 
 // Reserved
-def RRegs : RegisterClass<"XCore", [i32], 32, [CP, DP, SP, LR]> {
+def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 78a6fa5b2edb..ad069bf138a7 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -7,14 +7,22 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the XCore specific subclass of TargetSubtarget.
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "XCoreSubtarget.h"
 #include "XCore.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS)
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+                               const std::string &CPU, const std::string &FS)
+  : XCoreGenSubtargetInfo(TT, CPU, FS)
 {
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index f8be3ec86189..7b29fa236710 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -7,32 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the XCore specific subclass of TargetSubtarget.
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef XCORESUBTARGET_H
 #define XCORESUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
-
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class XCoreSubtarget : public TargetSubtarget {
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
 
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  XCoreSubtarget(const std::string &TT, const std::string &FS);
+  XCoreSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 } // End llvm namespace
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 30da2c896c0f..342966ae5c86 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCoreMCAsmInfo.h"
 #include "XCoreTargetMachine.h"
 #include "XCore.h"
 #include "llvm/Module.h"
@@ -21,9 +20,10 @@ using namespace llvm;
 /// XCoreTargetMachine ctor - Create an ILP32 architecture model
 ///
 XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32-n32"),
     InstrInfo(),
@@ -41,5 +41,4 @@ bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreTarget() {
   RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
-  RegisterAsmInfo<XCoreMCAsmInfo> Y(TheXCoreTarget);
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 24daadcb6bf4..6235ac3a6a1a 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSelectionDAGInfo TSInfo;
 public:
   XCoreTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameLowering *getFrameLowering() const {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 54a7f679e01c..fa007cfc6513 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has modified arguments.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   typedef std::set<IndicesVector> ScalarizeTable;
 
@@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                           AttributesVec.end()));
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                         AttributesVec.end()));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 179b150c1478..3de7bfceed1b 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_library(LLVMipo
   ArgumentPromotion.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
-  DeadTypeElimination.cpp
   ExtractGV.cpp
   FunctionAttrs.cpp
   GlobalDCE.cpp
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d4eaf0c4a3ec..15177650f4e5 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // the old function, but doesn't have isVarArg set.
   const FunctionType *FTy = Fn.getFunctionType();
 
-  std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+  std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
   FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
                                                 Params, false);
   unsigned NumArgs = Params.size();
@@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(PAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(PAL);
       if (cast<CallInst>(Call)->isTailCall())
@@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has fewer arguments and a different return type.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   // Set up to build a new list of parameter attributes.
   SmallVector<AttributeWithIndex, 8> AttributesVec;
@@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
   // Find out the new return value.
 
-  const Type *RetTy = FTy->getReturnType();
+  Type *RetTy = FTy->getReturnType();
   const Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
 
   // -1 means unused, other numbers are the new index
   SmallVector<int, 5> NewRetIdxs(RetCount, -1);
-  std::vector<const Type*> RetTypes;
+  std::vector<Type*> RetTypes;
   if (RetTy->isVoidTy()) {
     NRetTy = RetTy;
   } else {
@@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(NewCallPAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(NewCallPAL);
       if (cast<CallInst>(Call)->isTailCall())
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
deleted file mode 100644
index d3d4963b63eb..000000000000
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to cleanup the output of GCC.  It eliminate names for types
-// that are unused in the entire translation unit, using the FindUsedTypes pass.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "deadtypeelim"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
-
-namespace {
-  struct DTE : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    DTE() : ModulePass(ID) {
-      initializeDTEPass(*PassRegistry::getPassRegistry());
-    }
-
-    // doPassInitialization - For this pass, it removes global symbol table
-    // entries for primitive types.  These are never used for linking in GCC and
-    // they make the output uglier to look at, so we nuke them.
-    //
-    // Also, initialize instance variables.
-    //
-    bool runOnModule(Module &M);
-
-    // getAnalysisUsage - This function needs FindUsedTypes to do its job...
-    //
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<FindUsedTypes>();
-    }
-  };
-}
-
-char DTE::ID = 0;
-INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
-INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
-
-ModulePass *llvm::createDeadTypeEliminationPass() {
-  return new DTE();
-}
-
-
-// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
-// should be eliminated.
-//
-static inline bool ShouldNukeSymtabEntry(const Type *Ty){
-  // Nuke all names for primitive types!
-  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
-    return true;
-
-  // Nuke all pointers to primitive types as well...
-  if (const PointerType *PT = dyn_cast<PointerType>(Ty))
-    if (PT->getElementType()->isPrimitiveType() ||
-        PT->getElementType()->isIntegerTy()) 
-      return true;
-
-  return false;
-}
-
-// run - For this pass, it removes global symbol table entries for primitive
-// types.  These are never used for linking in GCC and they make the output
-// uglier to look at, so we nuke them.  Also eliminate types that are never used
-// in the entire program as indicated by FindUsedTypes.
-//
-bool DTE::runOnModule(Module &M) {
-  bool Changed = false;
-
-  TypeSymbolTable &ST = M.getTypeSymbolTable();
-  const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes();
-  std::set<const Type*> UsedTypes(T.begin(), T.end());
-
-  // Check the symbol table for superfluous type entries...
-  //
-  // Grab the 'type' plane of the module symbol...
-  TypeSymbolTable::iterator TI = ST.begin();
-  TypeSymbolTable::iterator TE = ST.end();
-  while ( TI != TE ) {
-    // If this entry should be unconditionally removed, or if we detect that
-    // the type is not used, remove it.
-    const Type *RHS = TI->second;
-    if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
-      ST.remove(TI++);
-      ++NumKilled;
-      Changed = true;
-    } else {
-      ++TI;
-      // We only need to leave one name for each type.
-      UsedTypes.erase(RHS);
-    }
-  }
-
-  return Changed;
-}
-
-// vim: sw=2
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cdf7b76dd087..4ac721dd0600 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1999,9 +1999,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
                                           const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
-  std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
-  CSVals.push_back(0);
+  Constant *CSVals[2];
+  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+  CSVals[1] = 0;
+
+  const StructType *StructTy =
+    cast <StructType>(
+    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
 
   // Create the new init list.
   std::vector<Constant*> CAList;
@@ -2016,12 +2020,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
                                    0x7fffffff);
     }
-    CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
+    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   }
 
   // Create the array initializer.
-  const Type *StructTy =
-      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
                                                    CAList.size()), CAList);
 
@@ -2218,42 +2220,40 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
 
     // Return the modified struct.
-    return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
-                               STy->isPacked());
-  } else {
-    ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
-    const SequentialType *InitTy = cast<SequentialType>(Init->getType());
-
-    uint64_t NumElts;
-    if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
-      NumElts = ATy->getNumElements();
-    else
-      NumElts = cast<VectorType>(InitTy)->getNumElements();
-
+    return ConstantStruct::get(STy, Elts);
+  }
+  
+  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+  const SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
-    // Break up the array into elements.
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
-      for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
-    } else {
-      assert(isa<UndefValue>(Init) && "This code is out of sync with "
-             " ConstantFoldLoadThroughGEPConstantExpr");
-      Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
-    }
+  uint64_t NumElts;
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+    NumElts = ATy->getNumElements();
+  else
+    NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+  // Break up the array into elements.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+    for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+    for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+  } else {
+    assert(isa<UndefValue>(Init) && "This code is out of sync with "
+           " ConstantFoldLoadThroughGEPConstantExpr");
+    Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+  }
 
-    assert(CI->getZExtValue() < NumElts);
-    Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+  assert(CI->getZExtValue() < NumElts);
+  Elts[CI->getZExtValue()] =
+    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
 
-    if (Init->getType()->isArrayTy())
-      return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
-    return ConstantVector::get(Elts);
-  }
+  if (Init->getType()->isArrayTy())
+    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+  return ConstantVector::get(Elts);
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 21dcb519d9c9..31ce95f53d33 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeConstantMergePass(Registry);
   initializeDAEPass(Registry);
   initializeDAHPass(Registry);
-  initializeDTEPass(Registry);
   initializeFunctionAttrsPass(Registry);
   initializeGlobalDCEPass(Registry);
   initializeGlobalOptPass(Registry);
@@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createDeadArgEliminationPass());
 }
 
-void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createDeadTypeEliminationPass());
-}
-
 void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createFunctionAttrsPass());
 }
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 52ecf17b8f9b..659476b139e4 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   CastInst* CI = 
     new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
   Value *Args[] = { CI, Inst->getArgOperand(1) };
-  CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
+  CallInst::Create(ThrowLongJmp, Args, "", Inst);
 
   SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
 
@@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
     GetSetJmpMap(Func), BufPtr,
     ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
   };
-  CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
+  CallInst::Create(AddSJToMap, Args, "", Inst);
 
   // We are guaranteed that there are no values live across basic blocks
   // (because we are "not in SSA form" yet), but there can still be values live
@@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI)
   std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
   InvokeInst* II =
     InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
-                       Params.begin(), Params.end(), CI.getName(), Term);
+                       Params, CI.getName(), Term);
   II->setCallingConv(CI.getCallingConv());
   II->setAttributes(CI.getAttributes());
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index f74144338a61..7796d05b7bc6 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     llvm_unreachable("Unknown type!");
     // Fall through in Release mode.
   case Type::IntegerTyID:
-  case Type::OpaqueTyID:
   case Type::VectorTyID:
     // Ty1 == Ty2 would have returned true earlier.
     return false;
@@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
     ++i;
   }
 
-  CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
+  CallInst *CI = Builder.CreateCall(F, Args);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
   if (NewG->getReturnType()->isVoidTy()) {
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 2f3baebf4864..b7e63dc4484c 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
       if (II->doesNotThrow()) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
         // Insert a call instruction before the invoke.
-        CallInst *Call = CallInst::Create(II->getCalledValue(),
-                                          Args.begin(), Args.end(), "", II);
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
         Call->takeName(II);
         Call->setCallingConv(II->getCallingConv());
         Call->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index a69076510806..0fbaff1509a7 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -28,8 +28,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
@@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
   SmallPtrSet<Constant*, 4> Operands;
   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-    if (isa<DerivedType>(C->getOperand(i)->getType()) &&
-        OnlyUsedBy(C->getOperand(i), C)) 
+    if (OnlyUsedBy(C->getOperand(i), C)) 
       Operands.insert(cast<Constant>(C->getOperand(i)));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
@@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
   }
 }
 
-// Strip the symbol table of its names.
-static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
-  for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
-    if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg"))
-      ++TI;
-    else
-      ST.remove(TI++);
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+  std::vector<StructType*> StructTypes;
+  M.findUsedStructTypes(StructTypes);
+
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *STy = StructTypes[i];
+    if (STy->isAnonymous() || STy->getName().empty()) continue;
+    
+    if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+      continue;
+
+    STy->setName("");
   }
 }
 
@@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
   }
   
   // Remove all names from types.
-  StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+  StripTypeNames(M, PreserveDbgInfo);
 
   return true;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a08446e5d519..64ea36fb1e9d 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
 /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
 /// If so, insert the new bswap intrinsic and return it.
 Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
   if (!ITy || ITy->getBitWidth() % 16 || 
       // ByteMask only allows up to 32-byte values.
       ITy->getBitWidth() > 32*8) 
@@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
       return 0;
-  const Type *Tys[] = { ITy };
   Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
   return CallInst::Create(F, V);
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ef67701921f9..537f2b318aa9 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         if (GVSrc->isConstant()) {
           Module *M = CI.getParent()->getParent()->getParent();
           Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
-                                 CI.getArgOperand(1)->getType(),
-                                 CI.getArgOperand(2)->getType() };
-          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                           CI.getArgOperand(1)->getType(),
+                           CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
           Changed = true;
         }
     }
@@ -355,7 +355,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::cttz: {
     // If all bits below the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
@@ -372,7 +374,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ctlz: {
     // If all bits above the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
@@ -412,7 +416,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getTrue(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
 
@@ -425,7 +430,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
     }
@@ -452,7 +458,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct =
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -472,7 +479,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -503,7 +511,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         UndefValue::get(LHS->getType()),
         Builder->getFalse()
       };
-      Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
       return InsertValueInst::Create(Struct, Mul, 0);
     }
   } // FALL THROUGH
@@ -532,7 +540,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -1109,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
     NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
-                               II->getUnwindDest(), Args.begin(), Args.end());
+                               II->getUnwindDest(), Args);
     NC->takeName(II);
     cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
     cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
   } else {
     CallInst *CI = cast<CallInst>(Caller);
-    NC = Builder->CreateCall(Callee, Args.begin(), Args.end());
+    NC = Builder->CreateCall(Callee, Args);
     NC->takeName(CI);
     if (CI->isTailCall())
       cast<CallInst>(NC)->setTailCall();
@@ -1178,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   const AttrListPtr &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
-    const Type *NestTy = 0;
+    Type *NestTy = 0;
     Attributes NestAttr = Attribute::None;
 
     // Look for a parameter marked with the 'nest' attribute.
@@ -1240,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       // Handle this by synthesizing a new function type, equal to FTy
       // with the chain parameter inserted.
 
-      std::vector<const Type*> NewTypes;
+      std::vector<Type*> NewTypes;
       NewTypes.reserve(FTy->getNumParams()+1);
 
       // Insert the chain's type into the list of parameter types, which may
@@ -1280,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
         NewCaller = InvokeInst::Create(NewCallee,
                                        II->getNormalDest(), II->getUnwindDest(),
-                                       NewArgs.begin(), NewArgs.end());
+                                       NewArgs);
         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
       } else {
-        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end());
+        NewCaller = CallInst::Create(NewCallee, NewArgs);
         if (cast<CallInst>(Caller)->isTailCall())
           cast<CallInst>(NewCaller)->setTailCall();
         cast<CallInst>(NewCaller)->
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 199902aa41f8..82c734e0b829 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
   }
   
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    // Cannot look past anything that might overflow.
+    OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+    if (OBI && !OBI->hasNoUnsignedWrap()) {
+      Scale = 1;
+      Offset = 0;
+      return Val;
+    }
+
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
         // This is a value scaled by '1 << the shift amt'.
@@ -1208,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() &&
       Call->getCalledFunction()->getName() == "sqrt" &&
-      Call->getNumArgOperands() == 1) {
+      Call->getNumArgOperands() == 1 &&
+      Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
     if (Arg && Arg->getOpcode() == Instruction::FPExt &&
         CI.getType()->isFloatTy() &&
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c7ed098cbf88..c78760b20692 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
 static bool HasAddOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().sgt(In1->getValue());
-    else
-      return Result->getValue().slt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ult(In1->getValue());
+
+  if (In2->isNegative())
+    return Result->getValue().sgt(In1->getValue());
+  return Result->getValue().slt(In1->getValue());
 }
 
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
@@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1,
 static bool HasSubOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().slt(In1->getValue());
-    else
-      return Result->getValue().sgt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ugt(In1->getValue());
+  
+  if (In2->isNegative())
+    return Result->getValue().slt(In1->getValue());
+
+  return Result->getValue().sgt(In1->getValue());
 }
 
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
@@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   case ICmpInst::ICMP_UGT:
     // True if LHS u> RHS and RHS == high-bit-mask - 1
     TrueIfSigned = true;
-    return RHS->getValue() ==
-      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+    return RHS->isMaxValue(true);
   case ICmpInst::ICMP_UGE: 
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
@@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
     
     // If this is indexing an array of structures, get the structure element.
     if (!LaterIndices.empty())
-      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
-                                          LaterIndices.size());
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
     
     // If the element is masked, handle it.
     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
@@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
       }
     }
-  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
     if (DivI->isExact())
       RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
     if (CmpRHSV == 0) {       // (X / neg) op 0
@@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         
         // If the sign bit of the XorCST is not set, there is no change to
         // the operation, just stop using the Xor.
-        if (!XorCST->getValue().isNegative()) {
+        if (!XorCST->isNegative()) {
           ICI.setOperand(0, CompareVal);
           Worklist.Add(LHSI);
           return &ICI;
@@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
-        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+        if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
           const APInt &NotSignBit = XorCST->getValue();
           ICmpInst::Predicate Pred = ICI.isSigned()
                                          ? ICI.getUnsignedPredicate()
@@ -1087,22 +1084,33 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // have its sign bit set or if it is an equality comparison. 
         // Extending a relational comparison when we're checking the sign
         // bit would not work.
-        if (Cast->hasOneUse() &&
-            (ICI.isEquality() ||
-             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
-          uint32_t BitWidth = 
-            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
-          APInt NewCST = AndCST->getValue().zext(BitWidth);
-          APInt NewCI = RHSV.zext(BitWidth);
-          Value *NewAnd = 
+        if (ICI.isEquality() ||
+            (!AndCST->isNegative() && RHSV.isNonNegative())) {
+          Value *NewAnd =
             Builder->CreateAnd(Cast->getOperand(0),
-                           ConstantInt::get(ICI.getContext(), NewCST),
-                               LHSI->getName());
+                               ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+          NewAnd->takeName(LHSI);
           return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantInt::get(ICI.getContext(), NewCI));
+                              ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
         }
       }
-      
+
+      // If the LHS is an AND of a zext, and we have an equality compare, we can
+      // shrink the and/compare to the smaller type, eliminating the cast.
+      if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+        const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+        // Make sure we don't compare the upper bits, SimplifyDemandedBits
+        // should fold the icmp to true/false in that case.
+        if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+          Value *NewAnd =
+            Builder->CreateAnd(Cast->getOperand(0),
+                               ConstantExpr::getTrunc(AndCST, Ty));
+          NewAnd->takeName(LHSI);
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantExpr::getTrunc(RHS, Ty));
+        }
+      }
+
       // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
       // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
       // happens a LOT in code produced by the C front-end, for bitfield
@@ -1396,18 +1404,27 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       case Instruction::Xor:
         // For the xor case, we can xor two constants together, eliminating
         // the explicit xor.
-        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
-          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
                               ConstantExpr::getXor(RHS, BOC));
-        
-        // FALLTHROUGH
+        } else if (RHSV == 0) {
+          // Replace ((xor A, B) != 0) with (A != B)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        }
+        break;
       case Instruction::Sub:
-        // Replace (([sub|xor] A, B) != 0) with (A != B)
-        if (RHSV == 0)
+        // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+        if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+                                ConstantExpr::getSub(BOp0C, RHS));
+        } else if (RHSV == 0) {
+          // Replace ((sub A, B) != 0) with (A != B)
           return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
                               BO->getOperand(1));
+        }
         break;
-        
       case Instruction::Or:
         // If bits are being or'd in that are not present in the constant we
         // are comparing against, then the comparison could never succeed!
@@ -1434,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
                                 Constant::getNullValue(RHS->getType()));
-          
+
+          // Don't perform the following transforms if the AND has multiple uses
+          if (!BO->hasOneUse())
+            break;
+
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
@@ -1659,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // result and the overflow bit.
   Module *M = I.getParent()->getParent()->getParent();
   
-  const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
   Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
-                                       &NewType, 1);
+                                       NewType);
 
   InstCombiner::BuilderTy *Builder = IC.Builder;
   
@@ -1701,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
   Builder->SetInsertPoint(OrigAdd);
 
   Module *M = I.getParent()->getParent()->getParent();
-  const Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+  Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
   CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
   Value *Add = Builder->CreateExtractValue(Call, 0);
 
@@ -2364,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                                 BO1->getOperand(0));
           }
           
-          if (CI->getValue().isMaxSignedValue()) {
+          if (CI->isMaxValue(true)) {
             ICmpInst::Predicate Pred = I.isSigned()
                                            ? I.getUnsignedPredicate()
                                            : I.getSignedPredicate();
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2d29403097ce..630a6fee3990 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
     bool hasNegative = false;
     for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
       if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
-        if (RHS->getValue().isNegative())
+        if (RHS->isNegative())
           hasNegative = true;
 
     if (hasNegative) {
       std::vector<Constant *> Elts(VWidth);
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
-          if (RHS->getValue().isNegative())
+          if (RHS->isNegative())
             Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
           else
             Elts[i] = RHS;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aeb3c3e880fa..5733c20828c6 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             // So at this point we know we have (Y -> OtherAddOp):
             //        select C, (add X, Y), (sub X, Z)
             Value *NegVal;  // Compute -Z
-            if (SI.getType()->isFloatingPointTy()) {
+            if (SI.getType()->isFPOrFPVectorTy()) {
               NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
             } else {
               NegVal = Builder->CreateNeg(SubOp->getOperand(1));
@@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               Builder->CreateSelect(CondVal, NewTrueOp,
                                     NewFalseOp, SI.getName() + ".p");
 
-            if (SI.getType()->isFloatingPointTy())
+            if (SI.getType()->isFPOrFPVectorTy())
               return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
             else
               return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 92c10f5546c0..ab98ef9fccf8 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // getelementptr instructions into a single instruction.
   //
   if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+
+    // If this GEP has only 0 indices, it is the same pointer as
+    // Src. If Src is not a trivial GEP too, don't combine
+    // the indices.
+    if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
+        !Src->hasOneUse())
+      return 0;
+
     // Note that if our source is a gep chain itself that we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
@@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       if (EV.getNumIndices() > 1)
         // Extract the remaining indices out of the constant indexed by the
         // first index
-        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+        return ExtractValueInst::Create(V, EV.getIndices().slice(1));
       else
         return ReplaceInstUsesWith(EV, V);
     }
@@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
         // with
         // %E = extractvalue { i32, { i32 } } %A, 0
         return ExtractValueInst::Create(IV->getAggregateOperand(),
-                                        EV.idx_begin(), EV.idx_end());
+                                        EV.getIndices());
     }
     if (exti == exte && insi == inse)
       // Both iterators are at the end: Index lists are identical. Replace
@@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
       Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
-                                                 EV.idx_begin(), EV.idx_end());
+                                                 EV.getIndices());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
-                                     insi, inse);
+                                     ArrayRef<unsigned>(insi, inse));
     }
     if (insi == inse)
       // The insert list is a prefix of the extract list
@@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // with
       // %E extractvalue { i32 } { i32 42 }, 0
       return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
-                                      exti, exte);
+                                      ArrayRef<unsigned>(exti, exte));
   }
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
     // We're extracting from an intrinsic, see if we're the only user, which
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index b90221301041..3f2c4123882d 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -561,25 +561,24 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
     Edge += Successors;
   }
 
+  ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
   GlobalVariable *EdgeTableGV =
       new GlobalVariable(
           *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
-          ConstantArray::get(EdgeTableTy,
-                             &EdgeTable[0], Succs.size() * Preds.size()),
+          ConstantArray::get(EdgeTableTy, V),
           "__llvm_gcda_edge_table");
   EdgeTableGV->setUnnamedAddr(true);
   return EdgeTableGV;
 }
 
 Constant *GCOVProfiler::getStartFileFunc() {
-  const Type *Args[] = { Type::getInt8PtrTy(*Ctx) };
   const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
+                                              Type::getInt8PtrTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
 }
 
 Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32PtrTy(*Ctx),                  // uint32_t *predecessor
     Type::getInt64PtrTy(*Ctx)->getPointerTo(),  // uint64_t **state_table_row
   };
@@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
 }
 
 Constant *GCOVProfiler::getEmitFunctionFunc() {
-  const Type *Args[2] = {
+  Type *Args[2] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
     Type::getInt8PtrTy(*Ctx),  // const char *function_name
   };
@@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
 }
 
 Constant *GCOVProfiler::getEmitArcsFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 182a43d396c0..75416637db4f 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -376,7 +376,7 @@ namespace llvm {
   public:
     static const StructType *get(LLVMContext& C) {
       return( StructType::get(
-                C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // type
                 TypeBuilder<types::i<32>, xcompile>::get(C), // array size
                 TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
                 NULL));
@@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue,
 
     CallInst::Create(
       increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
-      args.begin(), args.end(), "", insertPoint);
+      args, "", insertPoint);
   }
 }
 
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 7435bc37fbe1..445a5b6f6074 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   }
   Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
-  CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
-                                        "newargc", InsertPos);
+  CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
 
   // If argc or argv are not available in main, just pass null values in.
   Function::arg_iterator AI;
@@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
 void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
   // types.
-  const Type *GlobalDtorElems[2] = {
+  Type *GlobalDtorElems[2] = {
     Type::getInt32Ty(Mod->getContext()),
     FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
   };
@@ -164,7 +163,8 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   GlobalVariable *GlobalDtors = new GlobalVariable(
       *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
       GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-  dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false));
+                                    
+  dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
   GlobalDtors->setInitializer(ConstantArray::get(
       cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
 }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index fcf914f8baa0..c223da60e0fa 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  ObjCARC.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 53e46400dca8..cb9b5bebc5c7 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 
     MemDepResult InstDep = MD->getDependency(Inst);
     
-    // Ignore non-local store liveness.
+    // Ignore any store where we can't find a local dependence.
     // FIXME: cross-block DSE would be fun. :)
-    if (InstDep.isNonLocal() || 
-        // Ignore self dependence, which happens in the entry block of the
-        // function.
-        InstDep.getInst() == Inst)
+    if (InstDep.isNonLocal() || InstDep.isUnknown())
       continue;
      
     // If we're storing the same value back to a pointer that we just
@@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (Loc.Ptr == 0)
       continue;
     
-    while (!InstDep.isNonLocal()) {
+    while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
       // Get the memory clobbered by the instruction we depend on.  MemDep will
       // skip any instructions that 'Loc' clearly doesn't interact with.  If we
       // end up depending on a may- or must-aliased load, then we can't optimize
@@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 /// HandleFree - Handle frees of entire structures whose dependency is a store
 /// to a field of that structure.
 bool DSE::HandleFree(CallInst *F) {
+  bool MadeChange = false;
+
   MemDepResult Dep = MD->getDependency(F);
-  do {
-    if (Dep.isNonLocal()) return false;
-    
+
+  while (!Dep.isNonLocal() && !Dep.isUnknown()) {
     Instruction *Dependency = Dep.getInst();
     if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
-      return false;
+      return MadeChange;
   
     Value *DepPointer =
       GetUnderlyingObject(getStoredPointerOperand(Dependency));
 
     // Check for aliasing.
     if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
-      return false;
+      return MadeChange;
   
     // DCE instructions only used to calculate that store
     DeleteDeadInstruction(Dependency, *MD);
     ++NumFastStores;
+    MadeChange = true;
 
     // Inst's old Dependency is now deleted. Compute the next dependency,
     // which may also be dead, as in
@@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) {
     //    s[1] = 0; // This has just been deleted.
     //    free(s);
     Dep = MD->getDependency(F);
-  } while (!Dep.isNonLocal());
+  };
   
-  return true;
+  return MadeChange;
 }
 
 /// handleEndBlock - Remove dead stores to stack-allocated locations in the
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2515fd112c1b..87b7317ad2dd 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -91,6 +91,7 @@ namespace {
     uint32_t nextValueNumber;
 
     Expression create_expression(Instruction* I);
+    Expression create_extractvalue_expression(ExtractValueInst* EI);
     uint32_t lookup_or_add_call(CallInst* C);
   public:
     ValueTable() : nextValueNumber(1) { }
@@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> {
 //                     ValueTable Internal Functions
 //===----------------------------------------------------------------------===//
 
-
 Expression ValueTable::create_expression(Instruction *I) {
   Expression e;
   e.type = I->getType();
@@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) {
        OI != OE; ++OI)
     e.varargs.push_back(lookup_or_add(*OI));
   
-  if (CmpInst *C = dyn_cast<CmpInst>(I))
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     e.opcode = (C->getOpcode() << 8) | C->getPredicate();
-  else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
-    for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-         II != IE; ++II)
-      e.varargs.push_back(*II);
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) {
   return e;
 }
 
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+  assert(EI != 0 && "Not an ExtractValueInst?");
+  Expression e;
+  e.type = EI->getType();
+  e.opcode = 0;
+
+  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+  if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+    // EI might be an extract from one of our recognised intrinsics. If it
+    // is we'll synthesize a semantically equivalent expression instead on
+    // an extract value expression.
+    switch (I->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+        e.opcode = Instruction::Add;
+        break;
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        e.opcode = Instruction::Sub;
+        break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        e.opcode = Instruction::Mul;
+        break;
+      default:
+        break;
+    }
+
+    if (e.opcode != 0) {
+      // Intrinsic recognized. Grab its args to finish building the expression.
+      assert(I->getNumArgOperands() == 2 &&
+             "Expect two args for recognised intrinsics.");
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+      return e;
+    }
+  }
+
+  // Not a recognised intrinsic. Fall back to producing an extract value
+  // expression.
+  e.opcode = EI->getOpcode();
+  for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+
+  for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+         II != IE; ++II)
+    e.varargs.push_back(*II);
+
+  return e;
+}
+
 //===----------------------------------------------------------------------===//
 //                     ValueTable External Functions
 //===----------------------------------------------------------------------===//
@@ -227,21 +275,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
     // Non-local case.
     const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
       MD->getNonLocalCallDependency(CallSite(C));
-    // FIXME: call/call dependencies for readonly calls should return def, not
-    // clobber!  Move the checking logic to MemDep!
+    // FIXME: Move the checking logic to MemDep!
     CallInst* cdep = 0;
 
     // Check to see if we have a single dominating call instruction that is
     // identical to C.
     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
       const NonLocalDepEntry *I = &deps[i];
-      // Ignore non-local dependencies.
       if (I->getResult().isNonLocal())
         continue;
 
-      // We don't handle non-depedencies.  If we already have a call, reject
+      // We don't handle non-definitions.  If we already have a call, reject
       // instruction dependencies.
-      if (I->getResult().isClobber() || cdep != 0) {
+      if (!I->getResult().isDef() || cdep != 0) {
         cdep = 0;
         break;
       }
@@ -338,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::ExtractElement:
     case Instruction::InsertElement:
     case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
     case Instruction::InsertValue:
     case Instruction::GetElementPtr:
       exp = create_expression(I);
       break;
+    case Instruction::ExtractValue:
+      exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+      break;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
@@ -1192,8 +1240,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     // escaping uses to any values that are operands to these PHIs.
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
       PHINode *P = NewPHIs[i];
-      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
-        AA->addEscapingUse(P->getOperandUse(2*ii));
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+        unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+        AA->addEscapingUse(P->getOperandUse(jj));
+      }
     }
   }
 
@@ -1224,12 +1274,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
-      Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
     DEBUG(
       dbgs() << "GVN: non-local load ";
       WriteAsOperand(dbgs(), LI);
-      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+      dbgs() << " has unknown dependencies\n";
     );
     return false;
   }
@@ -1245,6 +1294,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
+    if (DepInfo.isUnknown()) {
+      UnavailableBlocks.push_back(DepBB);
+      continue;
+    }
+
     if (DepInfo.isClobber()) {
       // The address being loaded in this non-local block may not be the same as
       // the pointer operand of the load if PHI translation occurs.  Make sure
@@ -1305,6 +1359,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
       continue;
     }
 
+    assert(DepInfo.isDef() && "Expecting def here");
+
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
@@ -1691,10 +1747,22 @@ bool GVN::processLoad(LoadInst *L) {
     return false;
   }
 
+  if (Dep.isUnknown()) {
+    DEBUG(
+      // fast print dep, using operator<< on instruction is too slow.
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
+      dbgs() << " has unknown dependence\n";
+    );
+    return false;
+  }
+
   // If it is defined in another block, try harder.
   if (Dep.isNonLocal())
     return processNonLocalLoad(L);
 
+  assert(Dep.isDef() && "Expecting def here");
+
   Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
     Value *StoredVal = DepSI->getValueOperand();
@@ -2133,8 +2201,11 @@ bool GVN::performPRE(Function &F) {
         // Because we have added a PHI-use of the pointer value, it has now
         // "escaped" from alias analysis' perspective.  We need to inform
         // AA of this.
-        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
-          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+             ++ii) {
+          unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+        }
         
         if (MD)
           MD->invalidateCachedPointerInfo(Phi);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 04ee7c8ccb3b..dee3d38d72af 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -52,30 +52,32 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-STATISTIC(NumRemoved , "Number of aux indvars removed");
-STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted, "Number of canonical indvars added");
-STATISTIC(NumReplaced, "Number of exit values replaced");
-STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
-STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
-
-// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
-// and referenced here.
-namespace llvm {
-  extern bool DisableIVRewrite;
-}
+STATISTIC(NumRemoved     , "Number of aux indvars removed");
+STATISTIC(NumWidened     , "Number of indvars widened");
+STATISTIC(NumInserted    , "Number of canonical indvars added");
+STATISTIC(NumReplaced    , "Number of exit values replaced");
+STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+  "disable-iv-rewrite", cl::Hidden,
+  cl::desc("Disable canonical induction variable rewriting"));
 
 namespace {
   class IndVarSimplify : public LoopPass {
@@ -84,12 +86,14 @@ namespace {
     ScalarEvolution *SE;
     DominatorTree   *DT;
     TargetData      *TD;
+
     SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
+    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+                       Changed(false) {
       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     }
 
@@ -101,36 +105,46 @@ namespace {
       AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
-      AU.addRequired<IVUsers>();
+      if (!DisableIVRewrite)
+        AU.addRequired<IVUsers>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<IVUsers>();
+      if (!DisableIVRewrite)
+        AU.addPreserved<IVUsers>();
       AU.setPreservesCFG();
     }
 
   private:
+    virtual void releaseMemory() {
+      DeadInsts.clear();
+    }
+
     bool isValidRewrite(Value *FromVal, Value *ToVal);
 
+    void HandleFloatingPointIV(Loop *L, PHINode *PH);
+    void RewriteNonIntegerIVs(Loop *L);
+
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
     void SimplifyIVUsers(SCEVExpander &Rewriter);
+    void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+    bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void EliminateIVRemainder(BinaryOperator *Rem,
                               Value *IVOperand,
-                              bool IsSigned,
-                              PHINode *IVPhi);
-    void RewriteNonIntegerIVs(Loop *L);
+                              bool IsSigned);
+
+    void SimplifyCongruentIVs(Loop *L);
+
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                         PHINode *IndVar,
                                         SCEVExpander &Rewriter);
 
-    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
-    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
     void SinkUnusedInvariants(Loop *L);
-
-    void HandleFloatingPointIV(Loop *L, PHINode *PH);
   };
 }
 
@@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
   return true;
 }
 
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      BackedgeTakenCount->isZero())
-    return false;
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
 
-  if (!L->getExitingBlock())
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+  bool isExact = false;
+  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
     return false;
-
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
     return false;
-
-  // Special case: If the backedge-taken count is a UDiv, it's very likely a
-  // UDiv that ScalarEvolution produced in order to compute a precise
-  // expression, rather than a UDiv from the user's code. If we can't find a
-  // UDiv in the code with some simple searching, assume the former and forego
-  // rewriting the loop.
-  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return false;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != BackedgeTakenCount) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != BackedgeTakenCount)
-        return false;
-    }
-  }
+  IntVal = UIntVal;
   return true;
 }
 
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
 ///
-/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
-static const Type *getBackedgeIVType(Loop *L) {
-  if (!L->getExitingBlock())
-    return 0;
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge     = IncomingEdge^1;
 
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
-    return 0;
+  // Check incoming value.
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return 0;
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+    return;
 
-  const Type *Ty = 0;
-  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
-      OI != OE; ++OI) {
-    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
-    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
-    if (!Trunc)
-      continue;
+  // Check IV increment. Reject this PN if increment operation is not
+  // an add or increment value can not be represented by an integer.
+  BinaryOperator *Incr =
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
 
-    return Trunc->getSrcTy();
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+    return;
+
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
+  Value::use_iterator IncrUse = Incr->use_begin();
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
+  if (IncrUse == Incr->use_end()) return;
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
+  if (IncrUse != Incr->use_end()) return;
+
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
+    return;
+
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+
+
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+
+  // Find new predicate for integer comparison.
+  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  return Ty;
-}
 
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable.  This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
-                          const SCEV *BackedgeTakenCount,
-                          PHINode *IndVar,
-                          SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
 
-  // If the exiting block is not the same as the backedge block, we must compare
-  // against the preincremented value, otherwise we prefer to compare against
-  // the post-incremented value.
-  Value *CmpIndVar;
-  const SCEV *RHS = BackedgeTakenCount;
-  if (L->getExitingBlock() == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // If this addition may overflow, we have to be more pessimistic and
-    // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
-    const SCEV *N =
-      SE->getAddExpr(BackedgeTakenCount,
-                     SE->getConstant(BackedgeTakenCount->getType(), 1));
-    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-      // No overflow. Cast the sum.
-      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
-    } else {
-      // Potential overflow. Cast before doing the add.
-      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                        IndVar->getType());
-      RHS = SE->getAddExpr(RHS,
-                           SE->getConstant(IndVar->getType(), 1));
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
     }
 
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    unsigned Leftover = Range % uint32_t(IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+
   } else {
-    // We have to use the preincremented value...
-    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                      IndVar->getType());
-    CmpIndVar = IndVar;
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+
+    unsigned Leftover = Range % uint32_t(-IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
   }
 
-  // Expand the code for the iteration count.
-  assert(SE->isLoopInvariant(RHS, L) &&
-         "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
-  // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  ICmpInst::Predicate Opcode;
-  if (L->contains(BI->getSuccessor(0)))
-    Opcode = ICmpInst::ICMP_NE;
-  else
-    Opcode = ICmpInst::ICMP_EQ;
+  // Insert new integer induction variable.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
 
-  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
-               << "      LHS:" << *CmpIndVar << '\n'
-               << "       op:\t"
-               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-               << "      RHS:\t" << *RHS << "\n");
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 
-  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
 
-  Value *OrigCond = BI->getCondition();
-  // It's tempting to use replaceAllUsesWith here to fully replace the old
-  // comparison, but that's not immediately safe, since users of the old
-  // comparison may not be dominated by the new comparison. Instead, just
-  // update the branch to use the new comparison; in the common case this
-  // will make old comparison dead.
-  BI->setCondition(Cond);
-  DeadInsts.push_back(OrigCond);
+  // In the following deletions, PN may become dead and may be deleted.
+  // Use a WeakVH to observe whether this happens.
+  WeakVH WeakPH = PN;
 
-  ++NumLFTR;
-  Changed = true;
-  return Cond;
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+  // Delete the old floating point increment.
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
+  }
+
+  // Add a new IVUsers entry for the newly-created integer PHI.
+  if (IU)
+    IU->AddUsersIfInteresting(NewPHI);
 }
 
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+  // First step.  Check to see if there are any floating-point recurrences.
+  // If there are, change them into integer recurrences, permitting analysis by
+  // the SCEV routines.
+  //
+  BasicBlock *Header = L->getHeader();
+
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+      HandleFloatingPointIV(L, PN);
+
+  // If the loop previously had floating-point IV, ScalarEvolution
+  // may not have been able to compute a trip count. Now that we've done some
+  // re-writing, the trip count may be computable.
+  if (Changed)
+    SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
 /// RewriteLoopExitValues - Check to see if this loop has a computable
 /// loop-invariant execution count.  If so, this means that we can compute the
 /// final value of any expressions that are recurrent in the loop, and
@@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   Rewriter.clearInsertPoint();
 }
 
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
-  // First step.  Check to see if there are any floating-point recurrences.
-  // If there are, change them into integer recurrences, permitting analysis by
-  // the SCEV routines.
+//===----------------------------------------------------------------------===//
+//  Rewrite IV users based on a canonical IV.
+//  To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+  // Each round of simplification involves a round of eliminating operations
+  // followed by a round of widening IVs. A single IVUsers worklist is used
+  // across all rounds. The inner loop advances the user. If widening exposes
+  // more uses, then another pass through the outer loop is triggered.
+  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+    Instruction *UseInst = I->getUser();
+    Value *IVOperand = I->getOperandValToReplace();
+
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+      EliminateIVComparison(ICmp, IVOperand);
+      continue;
+    }
+    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+        EliminateIVRemainder(Rem, IVOperand, IsSigned);
+        continue;
+      }
+    }
+  }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+  // Loop-invariant values are safe.
+  if (SE->isLoopInvariant(S, L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L, SE)) return false;
+    return true;
+  }
+
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L, SE);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+  // Rewrite all induction variable expressions in terms of the canonical
+  // induction variable.
   //
-  BasicBlock *Header = L->getHeader();
+  // If there were induction variables of other sizes or offsets, manually
+  // add the offsets to the primary induction variable and cast, avoiding
+  // the need for the code evaluation methods to insert induction variables
+  // of different sizes.
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
 
-  SmallVector<WeakVH, 8> PHIs;
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    PHIs.push_back(PN);
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
 
-  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
-      HandleFloatingPointIV(L, PN);
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (SE->isLoopInvariant(ExitVal, L))
+        AR = ExitVal;
+    }
 
-  // If the loop previously had floating-point IV, ScalarEvolution
-  // may not have been able to compute a trip count. Now that we've done some
-  // re-writing, the trip count may be computable.
-  if (Changed)
-    SE->forgetLoop(L);
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!isSafe(AR, L, SE))
+      continue;
+
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+      NewValI->setDebugLoc(User->getDebugLoc());
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
+  }
 }
 
+//===----------------------------------------------------------------------===//
+//  IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
 namespace {
   // Collect information about induction variables that are used by sign/zero
   // extend operations. This information is recorded by CollectExtend and
@@ -493,33 +752,30 @@ namespace {
 
     WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
   };
-  typedef std::map<PHINode *, WideIVInfo> WideIVMap;
 }
 
 /// CollectExtend - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
 /// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
-                          WideIVMap &IVMap, ScalarEvolution *SE,
-                          const TargetData *TD) {
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+                          ScalarEvolution *SE, const TargetData *TD) {
   const Type *Ty = Cast->getType();
   uint64_t Width = SE->getTypeSizeInBits(Ty);
   if (TD && !TD->isLegalInteger(Width))
     return;
 
-  WideIVInfo &IVInfo = IVMap[Phi];
-  if (!IVInfo.WidestNativeType) {
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
-    IVInfo.IsSigned = IsSigned;
+  if (!WI.WidestNativeType) {
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+    WI.IsSigned = IsSigned;
     return;
   }
 
   // We extend the IV to satisfy the sign of its first user, arbitrarily.
-  if (IVInfo.IsSigned != IsSigned)
+  if (WI.IsSigned != IsSigned)
     return;
 
-  if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 }
 
 namespace {
@@ -529,43 +785,45 @@ namespace {
 /// inserting truncs whenever we stop propagating the type.
 ///
 class WidenIV {
+  // Parameters
   PHINode *OrigPhi;
   const Type *WideType;
   bool IsSigned;
 
-  IVUsers *IU;
-  LoopInfo *LI;
-  Loop *L;
+  // Context
+  LoopInfo        *LI;
+  Loop            *L;
   ScalarEvolution *SE;
-  DominatorTree *DT;
-  SmallVectorImpl<WeakVH> &DeadInsts;
+  DominatorTree   *DT;
 
+  // Result
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
+  SmallVectorImpl<WeakVH> &DeadInsts;
 
-  SmallPtrSet<Instruction*,16> Processed;
+  SmallPtrSet<Instruction*,16> Widened;
+  SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
 
 public:
-  WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
-          LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+  WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+          ScalarEvolution *SEv, DominatorTree *DTree,
           SmallVectorImpl<WeakVH> &DI) :
     OrigPhi(PN),
-    WideType(IVInfo.WidestNativeType),
-    IsSigned(IVInfo.IsSigned),
-    IU(IUsers),
+    WideType(WI.WidestNativeType),
+    IsSigned(WI.IsSigned),
     LI(LInfo),
     L(LI->getLoopFor(OrigPhi->getParent())),
     SE(SEv),
     DT(DTree),
-    DeadInsts(DI),
     WidePhi(0),
     WideInc(0),
-    WideIncExpr(0) {
+    WideIncExpr(0),
+    DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
   }
 
-  bool CreateWideIV(SCEVExpander &Rewriter);
+  PHINode *CreateWideIV(SCEVExpander &Rewriter);
 
 protected:
   Instruction *CloneIVUser(Instruction *NarrowUse,
@@ -574,58 +832,13 @@ protected:
 
   const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
 
-  Instruction *WidenIVUse(Instruction *NarrowUse,
-                          Instruction *NarrowDef,
+  Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
                           Instruction *WideDef);
+
+  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 };
 } // anonymous namespace
 
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
-  WideIVMap IVMap;
-
-  // Each round of simplification involves a round of eliminating operations
-  // followed by a round of widening IVs. A single IVUsers worklist is used
-  // across all rounds. The inner loop advances the user. If widening exposes
-  // more uses, then another pass through the outer loop is triggered.
-  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
-    for(; I != E; ++I) {
-      Instruction *UseInst = I->getUser();
-      Value *IVOperand = I->getOperandValToReplace();
-
-      if (DisableIVRewrite) {
-        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
-          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
-          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
-            CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
-            continue;
-          }
-        }
-      }
-      if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
-        EliminateIVComparison(ICmp, IVOperand);
-        continue;
-      }
-      if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
-        bool IsSigned = Rem->getOpcode() == Instruction::SRem;
-        if (IsSigned || Rem->getOpcode() == Instruction::URem) {
-          EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
-          continue;
-        }
-      }
-    }
-    for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
-         I != E; ++I) {
-      WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
-      if (Widener.CreateWideIV(Rewriter))
-        Changed = true;
-    }
-  }
-}
-
 static Value *getExtend( Value *NarrowOper, const Type *WideType,
                                bool IsSigned, IRBuilder<> &Builder) {
   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
@@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
                                                     LHS, RHS,
                                                     NarrowBO->getName());
     Builder.Insert(WideBO);
-    if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
-    if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-
+    if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+      if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+      if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+    }
     return WideBO;
   }
   llvm_unreachable(0);
 }
 
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
-  if (!SE->isSCEVable(NarrowUse->getType()))
-    return 0;
-
-  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
-  const SCEV *WideExpr = IsSigned ?
-    SE->getSignExtendExpr(NarrowExpr, WideType) :
-    SE->getZeroExtendExpr(NarrowExpr, WideType);
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
-  if (!AddRec || AddRec->getLoop() != L)
-    return 0;
-
-  return AddRec;
-}
-
 /// HoistStep - Attempt to hoist an IV increment above a potential use.
 ///
 /// To successfully hoist, two criteria must be met:
@@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
   return true;
 }
 
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+  if (!SE->isSCEVable(NarrowUse->getType()))
+    return 0;
+
+  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+  if (SE->getTypeSizeInBits(NarrowExpr->getType())
+      >= SE->getTypeSizeInBits(WideType)) {
+    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+    // index. So don't follow this use.
+    return 0;
+  }
+
+  const SCEV *WideExpr = IsSigned ?
+    SE->getSignExtendExpr(NarrowExpr, WideType) :
+    SE->getZeroExtendExpr(NarrowExpr, WideType);
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+  if (!AddRec || AddRec->getLoop() != L)
+    return 0;
+
+  return AddRec;
+}
+
 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
 /// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
-                                 Instruction *NarrowDef,
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
                                  Instruction *WideDef) {
-  // To be consistent with IVUsers, stop traversing the def-use chain at
-  // inner-loop phis or post-loop phis.
-  if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
-    return 0;
+  Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
 
-  // Handle data flow merges and bizarre phi cycles.
-  if (!Processed.insert(NarrowUse))
+  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+  if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
     return 0;
 
   // Our raison d'etre! Eliminate sign and zero extension.
@@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
       if (CastWidth < IVWidth) {
         // The cast isn't as wide as the IV, so insert a Trunc.
-        IRBuilder<> Builder(NarrowUse);
+        IRBuilder<> Builder(NarrowDefUse);
         NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
       }
       else {
@@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
       NarrowUse->replaceAllUsesWith(NewDef);
       DeadInsts.push_back(NarrowUse);
     }
-    // Now that the extend is gone, expose it's uses to IVUsers for potential
-    // further simplification within SimplifyIVUsers.
-    IU->AddUsersIfInteresting(WideDef, WidePhi);
+    // Now that the extend is gone, we want to expose it's uses for potential
+    // further simplification. We don't need to directly inform SimplifyIVUsers
+    // of the new users, because their parent IV will be processed later as a
+    // new loop phi. If we preserved IVUsers analysis, we would also want to
+    // push the uses of WideDef here.
 
     // No further widening is needed. The deceased [sz]ext had done it for us.
     return 0;
   }
+
+  // Does this user itself evaluate to a recurrence after widening?
   const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
   if (!WideAddRec) {
     // This user does not evaluate to a recurence after widening, so don't
     // follow it. Instead insert a Trunc to kill off the original use,
     // eventually isolating the original narrow IV so it can be removed.
-    IRBuilder<> Builder(NarrowUse);
+    IRBuilder<> Builder(NarrowDefUse);
     Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
     NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
     return 0;
   }
+  // We assume that block terminators are not SCEVable. We wouldn't want to
+  // insert a Trunc after a terminator if there happens to be a critical edge.
+  assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+         "SCEV is not expected to evaluate a block terminator");
+
   // Reuse the IV increment that SCEVExpander created as long as it dominates
   // NarrowUse.
   Instruction *WideUse = 0;
@@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
     if (!WideUse)
       return 0;
   }
-  // GetWideRecurrence ensured that the narrow expression could be extended
-  // outside the loop without overflow. This suggests that the wide use
+  // Evaluation of WideAddRec ensured that the narrow expression could be
+  // extended outside the loop without overflow. This suggests that the wide use
   // evaluates to the same expression as the extended narrow use, but doesn't
   // absolutely guarantee it. Hence the following failsafe check. In rare cases
-  // where it fails, we simple throw away the newly created wide use.
+  // where it fails, we simply throw away the newly created wide use.
   if (WideAddRec != SE->getSCEV(WideUse)) {
     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
@@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
   return WideUse;
 }
 
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+  for (Value::use_iterator UI = NarrowDef->use_begin(),
+         UE = NarrowDef->use_end(); UI != UE; ++UI) {
+    Use &U = UI.getUse();
+
+    // Handle data flow merges and bizarre phi cycles.
+    if (!Widened.insert(cast<Instruction>(U.getUser())))
+      continue;
+
+    NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+  }
+}
+
 /// CreateWideIV - Process a single induction variable. First use the
 /// SCEVExpander to create a wide induction variable that evaluates to the same
 /// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
 /// interesting IV users, the narrow IV will be isolated for removal by
 /// DeleteDeadPHIs.
 ///
 /// It would be simpler to delete uses as they are processed, but we must avoid
 /// invalidating SCEV expressions.
 ///
-bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Is this phi an induction variable?
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   if (!AddRec)
-    return false;
+    return NULL;
 
   // Widen the induction variable expression.
   const SCEV *WideIVExpr = IsSigned ?
@@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Can the IV be extended outside the loop without overflow?
   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   if (!AddRec || AddRec->getLoop() != L)
-    return false;
+    return NULL;
 
-  // An AddRec must have loop-invariant operands. Since this AddRec it
+  // An AddRec must have loop-invariant operands. Since this AddRec is
   // materialized by a loop header phi, the expression cannot have any post-loop
   // operands, so they must dominate the loop header.
   assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
@@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   ++NumWidened;
 
   // Traverse the def-use chain using a worklist starting at the original IV.
-  assert(Processed.empty() && "expect initial state" );
+  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+  Widened.insert(OrigPhi);
+  pushNarrowIVUsers(OrigPhi, WidePhi);
 
-  // Each worklist entry has a Narrow def-use link and Wide def.
-  SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
-  for (Value::use_iterator UI = OrigPhi->use_begin(),
-         UE = OrigPhi->use_end(); UI != UE; ++UI) {
-    NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi));
-  }
   while (!NarrowIVUsers.empty()) {
-    Use *NarrowDefUse;
+    Use *UsePtr;
     Instruction *WideDef;
-    tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val();
+    tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+    Use &NarrowDefUse = *UsePtr;
 
     // Process a def-use edge. This may replace the use, so don't hold a
     // use_iterator across it.
-    Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get());
-    Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser());
-    Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef);
+    Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+    Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
 
     // Follow all def-use edges from the previous narrow use.
-    if (WideUse) {
-      for (Value::use_iterator UI = NarrowUse->use_begin(),
-             UE = NarrowUse->use_end(); UI != UE; ++UI) {
-        NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse));
-      }
-    }
+    if (WideUse)
+      pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
     // WidenIVUse may have removed the def-use edge.
     if (NarrowDef->use_empty())
       DeadInsts.push_back(NarrowDef);
   }
-  return true;
+  return WidePhi;
 }
 
+//===----------------------------------------------------------------------===//
+//  Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
 void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
   unsigned IVOperIdx = 0;
   ICmpInst::Predicate Pred = ICmp->getPredicate();
@@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
 
 void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
                                           Value *IVOperand,
-                                          bool IsSigned,
-                                          PHINode *IVPhi) {
+                                          bool IsSigned) {
   // We're only interested in the case where we know something about
   // the numerator.
   if (IVOperand != Rem->getOperand(0))
@@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
   }
 
   // Inform IVUsers about the new users.
-  if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
-    IU->AddUsersIfInteresting(I, IVPhi);
-
+  if (IU) {
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+  }
   DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
   ++NumElimRem;
   Changed = true;
   DeadInsts.push_back(Rem);
 }
 
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+                                     Instruction *IVOperand) {
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+    EliminateIVComparison(ICmp, IVOperand);
+    return true;
+  }
+  if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+    bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+    if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+      EliminateIVRemainder(Rem, IVOperand, IsSigned);
+      return true;
+    }
+  }
+
+  // Eliminate any operation that SCEV can prove is an identity function.
+  if (!SE->isSCEVable(UseInst->getType()) ||
+      (UseInst->getType() != IVOperand->getType()) ||
+      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+    return false;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+  UseInst->replaceAllUsesWith(IVOperand);
+  ++NumElimIdentity;
+  Changed = true;
+  DeadInsts.push_back(UseInst);
+  return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+  Instruction *Def,
+  SmallPtrSet<Instruction*,16> &Simplified,
+  SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Avoid infinite or exponential worklist processing.
+    // Also ensure unique worklist users.
+    // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+    // self edges first.
+    if (User != Def && Simplified.insert(User))
+      SimpleIVUsers.push_back(std::make_pair(User, Def));
+  }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  // We assume that terminators are not SCEVable.
+  assert((!S || I != I->getParent()->getTerminator()) &&
+         "can't fold terminators");
+
+  // Only consider affine recurrences.
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (AR && AR->getLoop() == L)
+    return true;
+
+  return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+  std::map<PHINode *, WideIVInfo> WideIVMap;
+
+  SmallVector<PHINode*, 8> LoopPhis;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    LoopPhis.push_back(cast<PHINode>(I));
+  }
+  // Each round of simplification iterates through the SimplifyIVUsers worklist
+  // for all current phis, then determines whether any IVs can be
+  // widened. Widening adds new phis to LoopPhis, inducing another round of
+  // simplification on the wide IVs.
+  while (!LoopPhis.empty()) {
+    // Evaluate as many IV expressions as possible before widening any IVs. This
+    // forces SCEV to set no-wrap flags before evaluating sign/zero
+    // extension. The first time SCEV attempts to normalize sign/zero extension,
+    // the result becomes final. So for the most predictable results, we delay
+    // evaluation of sign/zero extend evaluation until needed, and avoid running
+    // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+    do {
+      PHINode *CurrIV = LoopPhis.pop_back_val();
+
+      // Information about sign/zero extensions of CurrIV.
+      WideIVInfo WI;
+
+      // Instructions processed by SimplifyIVUsers for CurrIV.
+      SmallPtrSet<Instruction*,16> Simplified;
+
+      // Use-def pairs if IV users waiting to be processed for CurrIV.
+      SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+      // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+      // called multiple times for the same LoopPhi. This is the proper thing to
+      // do for loop header phis that use each other.
+      pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+      while (!SimpleIVUsers.empty()) {
+        Instruction *UseInst, *Operand;
+        tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+        // Bypass back edges to avoid extra work.
+        if (UseInst == CurrIV) continue;
+
+        if (EliminateIVUser(UseInst, Operand)) {
+          pushIVUsers(Operand, Simplified, SimpleIVUsers);
+          continue;
+        }
+        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+            CollectExtend(Cast, IsSigned, WI, SE, TD);
+          }
+          continue;
+        }
+        if (isSimpleIVUser(UseInst, L, SE)) {
+          pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+        }
+      }
+      if (WI.WidestNativeType) {
+        WideIVMap[CurrIV] = WI;
+      }
+    } while(!LoopPhis.empty());
+
+    for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+           E = WideIVMap.end(); I != E; ++I) {
+      WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+        Changed = true;
+        LoopPhis.push_back(WidePhi);
+      }
+    }
+    WideIVMap.clear();
+  }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEV *S = SE->getSCEV(Phi);
+    DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+    bool Inserted;
+    tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+    if (Inserted)
+      continue;
+    PHINode *OrigPhi = Pos->second;
+    // Replacing the congruent phi is sufficient because acyclic redundancy
+    // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+    // that a phi is congruent, it's almost certain to be the head of an IV
+    // user cycle that is isomorphic with the original phi. So it's worth
+    // eagerly cleaning up the common case of a single IV increment.
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+      if (OrigInc != IsomorphicInc &&
+          SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+          HoistStep(OrigInc, IsomorphicInc, DT)) {
+        DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+              << *IsomorphicInc << '\n');
+        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        DeadInsts.push_back(IsomorphicInc);
+      }
+    }
+    DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElimIV;
+    Phi->replaceAllUsesWith(OrigPhi);
+    DeadInsts.push_back(Phi);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+      BackedgeTakenCount->isZero())
+    return false;
+
+  if (!L->getExitingBlock())
+    return false;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return false;
+
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return false;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+  if (!L->getExitingBlock())
+    return 0;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return 0;
+
+  const Type *Ty = 0;
+  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+      OI != OE; ++OI) {
+    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+    if (!Trunc)
+      continue;
+
+    return Trunc->getSrcTy();
+  }
+  return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable.  This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+                          const SCEV *BackedgeTakenCount,
+                          PHINode *IndVar,
+                          SCEVExpander &Rewriter) {
+  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+  // If the exiting block is not the same as the backedge block, we must compare
+  // against the preincremented value, otherwise we prefer to compare against
+  // the post-incremented value.
+  Value *CmpIndVar;
+  const SCEV *RHS = BackedgeTakenCount;
+  if (L->getExitingBlock() == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // If this addition may overflow, we have to be more pessimistic and
+    // cast the induction variable before doing the add.
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+    const SCEV *N =
+      SE->getAddExpr(BackedgeTakenCount,
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                        IndVar->getType());
+      RHS = SE->getAddExpr(RHS,
+                           SE->getConstant(IndVar->getType(), 1));
+    }
+
+    // The BackedgeTaken expression contains the number of times that the
+    // backedge branches to the loop header.  This is one less than the
+    // number of times the loop executes, so use the incremented indvar.
+    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+  } else {
+    // We have to use the preincremented value...
+    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                      IndVar->getType());
+    CmpIndVar = IndVar;
+  }
+
+  // Expand the code for the iteration count.
+  assert(SE->isLoopInvariant(RHS, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+  // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  ICmpInst::Predicate Opcode;
+  if (L->contains(BI->getSuccessor(0)))
+    Opcode = ICmpInst::ICMP_NE;
+  else
+    Opcode = ICmpInst::ICMP_EQ;
+
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
+
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  Cond->setDebugLoc(BI->getDebugLoc());
+  Value *OrigCond = BI->getCondition();
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
+  DeadInsts.push_back(OrigCond);
+
+  ++NumLFTR;
+  Changed = true;
+  return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock) return;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+  BasicBlock::iterator I = Preheader->getTerminator();
+  while (I != Preheader->begin()) {
+    --I;
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also don't
+    // move instructions which might read memory, since the loop may modify
+    // memory. Note that it's okay if the instruction might have undefined
+    // behavior: LoopSimplify guarantees that the preheader dominates the exit
+    // block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
+
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoop = false;
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI) {
+      User *U = *UI;
+      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(U)) {
+        unsigned i =
+          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+        UseBB = P->getIncomingBlock(i);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoop = true;
+        break;
+      }
+    }
+
+    // If there is, the def must remain in the preheader.
+    if (UsedInLoop)
+      continue;
+
+    // Otherwise, sink it to the exit block.
+    Instruction *ToMove = I;
+    bool Done = false;
+
+    if (I != Preheader->begin()) {
+      // Skip debug info intrinsics.
+      do {
+        --I;
+      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+        Done = true;
+    } else {
+      Done = true;
+    }
+
+    ToMove->moveBefore(InsertPt);
+    if (Done) break;
+    InsertPt = ToMove;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isLoopSimplifyForm())
     return false;
 
-  IU = &getAnalysis<IVUsers>();
+  if (!DisableIVRewrite)
+    IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
@@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
   // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE);
-  if (DisableIVRewrite)
+  SCEVExpander Rewriter(*SE, "indvars");
+
+  // Eliminate redundant IV users.
+  //
+  // Simplification works best when run before other consumers of SCEV. We
+  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+  // other expressions involving loop IVs have been evaluated. This helps SCEV
+  // set no-wrap flags before normalizing sign/zero extension.
+  if (DisableIVRewrite) {
     Rewriter.disableCanonicalMode();
+    SimplifyIVUsersNoRewrite(L, Rewriter);
+  }
 
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
@@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     RewriteLoopExitValues(L, Rewriter);
 
   // Eliminate redundant IV users.
-  SimplifyIVUsers(Rewriter);
+  if (!DisableIVRewrite)
+    SimplifyIVUsers(Rewriter);
+
+  // Eliminate redundant IV cycles.
+  if (DisableIVRewrite)
+    SimplifyCongruentIVs(L);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
            "canonical IV disrupted BackedgeTaken expansion");
     assert(NeedCannIV &&
            "LinearFunctionTestReplace requires a canonical induction variable");
-    NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
-                                        Rewriter);
+    // Check preconditions for proper SCEVExpander operation. SCEV does not
+    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+    // pass that uses the SCEVExpander must do it. This does not work well for
+    // loop passes because SCEVExpander makes assumptions about all loops, while
+    // LoopPassManager only forces the current loop to be simplified.
+    //
+    // FIXME: SCEV expansion has no way to bail out, so the caller must
+    // explicitly check any assumptions made by SCEV. Brittle.
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+    if (!AR || AR->getLoop()->getLoopPreheader())
+      NewICmp =
+        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
   }
   // Rewrite IV-derived expressions.
   if (!DisableIVRewrite)
@@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
-  if (NewICmp)
-    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
-                              IndVar);
+  if (NewICmp && IU)
+    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
 
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
   return Changed;
 }
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables.  Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials.  For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
-  // Loop-invariant values are safe.
-  if (SE->isLoopInvariant(S, L)) return true;
-
-  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
-  // to transform them into efficient code.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-    return AR->isAffine();
-
-  // An add is safe it all its operands are safe.
-  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
-         E = Commutative->op_end(); I != E; ++I)
-      if (!isSafe(*I, L, SE)) return false;
-    return true;
-  }
-
-  // A cast is safe if its operand is.
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return isSafe(C->getOperand(), L, SE);
-
-  // A udiv is safe if its operands are.
-  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
-    return isSafe(UD->getLHS(), L, SE) &&
-           isSafe(UD->getRHS(), L, SE);
-
-  // SCEVUnknown is always safe.
-  if (isa<SCEVUnknown>(S))
-    return true;
-
-  // Nothing else is safe.
-  return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  // Rewrite all induction variable expressions in terms of the canonical
-  // induction variable.
-  //
-  // If there were induction variables of other sizes or offsets, manually
-  // add the offsets to the primary induction variable and cast, avoiding
-  // the need for the code evaluation methods to insert induction variables
-  // of different sizes.
-  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    Value *Op = UI->getOperandValToReplace();
-    const Type *UseTy = Op->getType();
-    Instruction *User = UI->getUser();
-
-    // Compute the final addrec to expand into code.
-    const SCEV *AR = IU->getReplacementExpr(*UI);
-
-    // Evaluate the expression out of the loop, if possible.
-    if (!L->contains(UI->getUser())) {
-      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-      if (SE->isLoopInvariant(ExitVal, L))
-        AR = ExitVal;
-    }
-
-    // FIXME: It is an extremely bad idea to indvar substitute anything more
-    // complex than affine induction variables.  Doing so will put expensive
-    // polynomial evaluations inside of the loop, and the str reduction pass
-    // currently can only reduce affine polynomials.  For now just disable
-    // indvar subst on anything more complex than an affine addrec, unless
-    // it can be expanded to a trivial value.
-    if (!isSafe(AR, L, SE))
-      continue;
-
-    // Determine the insertion point for this user. By default, insert
-    // immediately before the user. The SCEVExpander class will automatically
-    // hoist loop invariants out of the loop. For PHI nodes, there may be
-    // multiple uses, so compute the nearest common dominator for the
-    // incoming blocks.
-    Instruction *InsertPt = User;
-    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
-      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-        if (PHI->getIncomingValue(i) == Op) {
-          if (InsertPt == User)
-            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
-          else
-            InsertPt =
-              DT->findNearestCommonDominator(InsertPt->getParent(),
-                                             PHI->getIncomingBlock(i))
-                    ->getTerminator();
-        }
-
-    // Now expand it into actual Instructions and patch it into place.
-    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
-
-    if (!isValidRewrite(Op, NewVal)) {
-      DeadInsts.push_back(NewVal);
-      continue;
-    }
-    // Inform ScalarEvolution that this value is changing. The change doesn't
-    // affect its value, but it does potentially affect which use lists the
-    // value will be on after the replacement, which affects ScalarEvolution's
-    // ability to walk use lists and drop dangling pointers when a value is
-    // deleted.
-    SE->forgetValue(User);
-
-    // Patch the new value into place.
-    if (Op->hasName())
-      NewVal->takeName(Op);
-    User->replaceUsesOfWith(Op, NewVal);
-    UI->setOperandValToReplace(NewVal);
-
-    ++NumRemoved;
-    Changed = true;
-
-    // The old value may be dead now.
-    DeadInsts.push_back(Op);
-  }
-}
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock) return;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) return;
-
-  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
-  BasicBlock::iterator I = Preheader->getTerminator();
-  while (I != Preheader->begin()) {
-    --I;
-    // New instructions were inserted at the end of the preheader.
-    if (isa<PHINode>(I))
-      break;
-
-    // Don't move instructions which might have side effects, since the side
-    // effects need to complete before instructions inside the loop.  Also don't
-    // move instructions which might read memory, since the loop may modify
-    // memory. Note that it's okay if the instruction might have undefined
-    // behavior: LoopSimplify guarantees that the preheader dominates the exit
-    // block.
-    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
-      continue;
-
-    // Skip debug info intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    // Don't sink static AllocaInsts out of the entry block, which would
-    // turn them into dynamic allocas!
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (AI->isStaticAlloca())
-        continue;
-
-    // Determine if there is a use in or before the loop (direct or
-    // otherwise).
-    bool UsedInLoop = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-         UI != UE; ++UI) {
-      User *U = *UI;
-      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
-      if (PHINode *P = dyn_cast<PHINode>(U)) {
-        unsigned i =
-          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
-        UseBB = P->getIncomingBlock(i);
-      }
-      if (UseBB == Preheader || L->contains(UseBB)) {
-        UsedInLoop = true;
-        break;
-      }
-    }
-
-    // If there is, the def must remain in the preheader.
-    if (UsedInLoop)
-      continue;
-
-    // Otherwise, sink it to the exit block.
-    Instruction *ToMove = I;
-    bool Done = false;
-
-    if (I != Preheader->begin()) {
-      // Skip debug info intrinsics.
-      do {
-        --I;
-      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
-
-      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
-        Done = true;
-    } else {
-      Done = true;
-    }
-
-    ToMove->moveBefore(InsertPt);
-    if (Done) break;
-    InsertPt = ToMove;
-  }
-}
-
-/// ConvertToSInt - Convert APF to an integer, if possible.
-static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
-  bool isExact = false;
-  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
-  // See if we can convert this to an int64_t
-  uint64_t UIntVal;
-  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
-                           &isExact) != APFloat::opOK || !isExact)
-    return false;
-  IntVal = UIntVal;
-  return true;
-}
-
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-///   bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-///   bar((double)i);
-///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
-  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
-
-  // Check incoming value.
-  ConstantFP *InitValueVal =
-    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
-  int64_t InitValue;
-  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
-    return;
-
-  // Check IV increment. Reject this PN if increment operation is not
-  // an add or increment value can not be represented by an integer.
-  BinaryOperator *Incr =
-    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
-  // If this is not an add of the PHI with a constantfp, or if the constant fp
-  // is not an integer, bail out.
-  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
-  int64_t IncValue;
-  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
-      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
-    return;
-
-  // Check Incr uses. One user is PN and the other user is an exit condition
-  // used by the conditional terminator.
-  Value::use_iterator IncrUse = Incr->use_begin();
-  Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->use_end()) return;
-  Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->use_end()) return;
-
-  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
-  // only used by a branch, we can't transform it.
-  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
-  if (!Compare)
-    Compare = dyn_cast<FCmpInst>(U2);
-  if (Compare == 0 || !Compare->hasOneUse() ||
-      !isa<BranchInst>(Compare->use_back()))
-    return;
-
-  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
-
-  // We need to verify that the branch actually controls the iteration count
-  // of the loop.  If not, the new IV can overflow and no one will notice.
-  // The branch block must be in the loop and one of the successors must be out
-  // of the loop.
-  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
-  if (!L->contains(TheBr->getParent()) ||
-      (L->contains(TheBr->getSuccessor(0)) &&
-       L->contains(TheBr->getSuccessor(1))))
-    return;
-
-
-  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
-  // transform it.
-  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
-  int64_t ExitValue;
-  if (ExitValueVal == 0 ||
-      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
-    return;
-
-  // Find new predicate for integer comparison.
-  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
-  switch (Compare->getPredicate()) {
-  default: return;  // Unknown comparison.
-  case CmpInst::FCMP_OEQ:
-  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
-  case CmpInst::FCMP_ONE:
-  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
-  case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
-  case CmpInst::FCMP_OGE:
-  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
-  case CmpInst::FCMP_OLT:
-  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
-  case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
-  }
-
-  // We convert the floating point induction variable to a signed i32 value if
-  // we can.  This is only safe if the comparison will not overflow in a way
-  // that won't be trapped by the integer equivalent operations.  Check for this
-  // now.
-  // TODO: We could use i64 if it is native and the range requires it.
-
-  // The start/stride/exit values must all fit in signed i32.
-  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
-    return;
-
-  // If not actually striding (add x, 0.0), avoid touching the code.
-  if (IncValue == 0)
-    return;
-
-  // Positive and negative strides have different safety conditions.
-  if (IncValue > 0) {
-    // If we have a positive stride, we require the init to be less than the
-    // exit value and an equality or less than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
-      return;
-
-    uint32_t Range = uint32_t(ExitValue-InitValue);
-    if (NewPred == CmpInst::ICMP_SLE) {
-      // Normalize SLE -> SLT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
-      return;
-
-  } else {
-    // If we have a negative stride, we require the init to be greater than the
-    // exit value and an equality or greater than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
-      return;
-
-    uint32_t Range = uint32_t(InitValue-ExitValue);
-    if (NewPred == CmpInst::ICMP_SGE) {
-      // Normalize SGE -> SGT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(-IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
-      return;
-  }
-
-  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
-
-  // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
-  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
-                      PN->getIncomingBlock(IncomingEdge));
-
-  Value *NewAdd =
-    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
-                              Incr->getName()+".int", Incr);
-  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
-
-  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
-                                      ConstantInt::get(Int32Ty, ExitValue),
-                                      Compare->getName());
-
-  // In the following deletions, PN may become dead and may be deleted.
-  // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PN;
-
-  // Delete the old floating point exit comparison.  The branch starts using the
-  // new comparison.
-  NewCompare->takeName(Compare);
-  Compare->replaceAllUsesWith(NewCompare);
-  RecursivelyDeleteTriviallyDeadInstructions(Compare);
-
-  // Delete the old floating point increment.
-  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
-  RecursivelyDeleteTriviallyDeadInstructions(Incr);
-
-  // If the FP induction variable still has uses, this is because something else
-  // in the loop uses its value.  In order to canonicalize the induction
-  // variable, we chose to eliminate the IV and rewrite it in terms of an
-  // int->fp cast.
-  //
-  // We give preference to sitofp over uitofp because it is faster on most
-  // platforms.
-  if (WeakPH) {
-    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
-                                 PN->getParent()->getFirstNonPHI());
-    PN->replaceAllUsesWith(Conv);
-    RecursivelyDeleteTriviallyDeadInstructions(PN);
-  }
-
-  // Add a new IVUsers entry for the newly-created integer PHI.
-  IU->AddUsersIfInteresting(NewPHI, NewPHI);
-}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index cf18ff040bda..b500d5b4fdff 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
   for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
     TestBB = BBTerm->getSuccessor(i);
     unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-    if (NumPreds < MinNumPreds)
+    if (NumPreds < MinNumPreds) {
       MinSucc = i;
+      MinNumPreds = NumPreds;
+    }
   }
 
   return MinSucc;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 13bd02215be5..66add6ca01ee 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 Pass *llvm::createLICMPass() { return new LICM(); }
 
 /// Hoist expressions out of the specified loop. Note, alias info for inner
-/// loop is not preserved so it is not a good idea to run LICM multiple 
+/// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
 ///
 bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // What if InnerLoop was modified by other passes ?
     CurAST->add(*InnerAST);
-    
+
     // Once we've incorporated the inner loop's AST into ours, we don't need the
     // subloop's anymore.
     delete InnerAST;
     LoopToAliasSetMap.erase(InnerL);
   }
-  
+
   CurLoop = L;
 
   // Get the preheader block to move instructions into...
@@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
          I != E; ++I)
       PromoteAliasSet(*I);
   }
-  
+
   // Clear out loops state information for the next iteration
   CurLoop = 0;
   Preheader = 0;
@@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
-    
+
     // If the instruction is dead, we would try to sink it because it isn't used
     // in the loop, instead, just delete it.
     if (isInstructionTriviallyDead(&I)) {
@@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
         I.eraseFromParent();
         continue;
       }
-      
+
       // Try hoisting the instruction out to the preheader.  We can only do this
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
@@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
-    
+
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
@@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) {
     }
     return;
   }
-  
+
   if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
@@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
   // hard work of inserting PHI nodes as necessary.
   SmallVector<PHINode*, 8> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
-  
+
   if (!I.use_empty())
     SSA.Initialize(I.getType(), I.getName());
-  
+
   // Insert a copy of the instruction in each exit block of the loop that is
   // dominated by the instruction.  Each exit block is known to only be in the
   // ExitBlocks list once.
   BasicBlock *InstOrigBB = I.getParent();
   unsigned NumInserted = 0;
-  
+
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
-    
+
     if (!DT->dominates(InstOrigBB, ExitBlock))
       continue;
-    
+
     // Insert the code after the last PHI node.
     BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-    
+
     // If this is the first exit block processed, just move the original
     // instruction, otherwise clone the original instruction and insert
     // the copy.
@@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) {
         New->setName(I.getName()+".le");
       ExitBlock->getInstList().insert(InsertPt, New);
     }
-    
+
     // Now that we have inserted the instruction, inform SSAUpdater.
     if (!I.use_empty())
       SSA.AddAvailableValue(ExitBlock, New);
   }
-  
+
   // If the instruction doesn't dominate any exit blocks, it must be dead.
   if (NumInserted == 0) {
     CurAST->deleteValue(&I);
@@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
   for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
     // Grab the use before incrementing the iterator.
@@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) {
     ++UI;
     SSA.RewriteUseAfterInsertions(U);
   }
-  
+
   // Update CurAST for NewPHIs if I had pointer type.
   if (I.getType()->isPointerTy())
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       CurAST->copyValue(&I, NewPHIs[i]);
-  
+
   // Finally, remove the instruction from CurAST.  It is no longer in the loop.
   CurAST->deleteValue(&I);
 }
@@ -606,15 +606,17 @@ namespace {
     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
     AliasSetTracker &AST;
     DebugLoc DL;
+    int Alignment;
   public:
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
                  SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
-                 DebugLoc dl)
-      : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP),
-        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {}
-    
+                 DebugLoc dl, int alignment)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+        Alignment(alignment) {}
+
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
       Value *Ptr;
@@ -624,7 +626,7 @@ namespace {
         Ptr = cast<StoreInst>(I)->getPointerOperand();
       return PointerMustAliases.count(Ptr);
     }
-    
+
     virtual void doExtraRewritesBeforeFinalDeletion() const {
       // Insert stores after in the loop exit blocks.  Each exit block gets a
       // store of the live-out values that feed them.  Since we've already told
@@ -635,6 +637,7 @@ namespace {
         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
         Instruction *InsertPos = ExitBlock->getFirstNonPHI();
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+        NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
       }
     }
@@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
     return;
-  
+
   assert(!AS.empty() &&
          "Must alias set should have at least one pointer element in it!");
   Value *SomePtr = AS.begin()->getValue();
@@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
   //
   // is not safe, because *P may only be valid to access if 'c' is true.
-  // 
+  //
   // It is safe to promote P if all uses are direct load/stores and if at
   // least one is guaranteed to be executed.
   bool GuaranteedToExecute = false;
-  
+
   SmallVector<Instruction*, 64> LoopUses;
   SmallPtrSet<Value*, 4> PointerMustAliases;
 
+  // We start with an alignment of one and try to find instructions that allow
+  // us to prove better alignment.
+  unsigned Alignment = 1;
+
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
   // different sizes.
   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
     Value *ASIV = ASI->getValue();
     PointerMustAliases.insert(ASIV);
-    
+
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
       return;
-    
+
     for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
          UI != UE; ++UI) {
       // Ignore instructions that are outside the loop.
       Instruction *Use = dyn_cast<Instruction>(*UI);
       if (!Use || !CurLoop->contains(Use))
         continue;
-      
+
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
-      if (isa<LoadInst>(Use))
+      unsigned InstAlignment;
+      if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
         assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
-      else if (isa<StoreInst>(Use)) {
+        InstAlignment = load->getAlignment();
+      } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (Use->getOperand(1) != ASIV)
           continue;
+        InstAlignment = store->getAlignment();
         assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
       } else
         return; // Not a load or store.
-      
+
+      // If the alignment of this instruction allows us to specify a more
+      // restrictive (and performant) alignment and if we are sure this
+      // instruction will be executed, update the alignment.
+      // Larger is better, with the exception of 0 being the best alignment.
+      if ((InstAlignment > Alignment || InstAlignment == 0)
+          && (Alignment != 0))
+        if (isSafeToExecuteUnconditionally(*Use)) {
+          GuaranteedToExecute = true;
+          Alignment = InstAlignment;
+        }
+
       if (!GuaranteedToExecute)
         GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-      
+
       LoopUses.push_back(Use);
     }
   }
-  
+
   // If there isn't a guaranteed-to-execute instruction, we can't promote.
   if (!GuaranteedToExecute)
     return;
-  
+
   // Otherwise, this is safe to promote, lets do it!
-  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
   Changed = true;
   ++NumPromoted;
 
@@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
-  
+
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        *CurAST, DL);
-  
+                        *CurAST, DL, Alignment);
+
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
   LoadInst *PreheaderLoad =
     new LoadInst(SomePtr, SomePtr->getName()+".promoted",
                  Preheader->getTerminator());
+  PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 753a558cfe83..f7f32981baa7 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    P->replaceUsesOfWith(exitingBlock, preheader);
+    int j = P->getBasicBlockIndex(exitingBlock);
+    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+    P->setIncomingBlock(j, preheader);
     for (unsigned i = 1; i < exitingBlocks.size(); ++i)
       P->removeIncomingValue(exitingBlocks[i]);
     ++BI;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index dbf6eec331da..a0e41d9a9772 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -167,12 +167,17 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
 static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (isInstructionTriviallyDead(I))
-      deleteDeadInstruction(I, SE);    
+      deleteDeadInstruction(I, SE);
 }
 
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   CurLoop = L;
 
+  // Disable loop idiom recognition if the function's name is a common idiom. 
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
   // The trip count of the loop must be analyzable.
   SE = &getAnalysis<ScalarEvolution>();
   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
@@ -467,8 +472,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE);
-  
+  SCEVExpander Expander(*SE, "loop-idiom");
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -488,7 +493,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     deleteIfDeadInstruction(BasePtr, *SE);
     return false;
   }
-  
+
   // Okay, everything looks good, insert the memset.
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
@@ -556,8 +561,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE);
-  
+  SCEVExpander Expander(*SE, "loop-idiom");
+
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -568,7 +573,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
     Expander.expandCodeFor(StoreEv->getStart(),
                            Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  
+
   if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
                             CurLoop, BECount, StoreSize,
                             getAnalysis<AliasAnalysis>(), SI)) {
@@ -593,9 +598,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
     deleteIfDeadInstruction(StoreBasePtr, *SE);
     return false;
   }
-  
+
   // Okay, everything is safe, we can transform this!
-  
+
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
@@ -619,7 +624,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
                << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
                << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-  
+
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 47dced37c3a4..9fd0958fd4c3 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
 
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73ebd618a0cb..509d0264f10b 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(),
-                              CondUse->getPhi());
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
     }
@@ -2768,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // value to the immediate would produce a value closer to zero than the
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
-          if (C->getValue()->getValue().isNegative() !=
+          if (C->getValue()->isNegative() !=
                 (NewF.AM.BaseOffs < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
                 .ule(abs64(NewF.AM.BaseOffs)))
@@ -3699,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   // we can remove them after we are done working.
   SmallVector<WeakVH, 16> DeadInsts;
 
-  SCEVExpander Rewriter(SE);
+  SCEVExpander Rewriter(SE, "lsr");
   Rewriter.disableCanonicalMode();
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e05f29c3e13f..840c4b69cf06 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
           ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
         
+        // If Succ has any successors with PHI nodes, update them to have
+        // entries coming from Pred instead of Succ.
+        Succ->replaceAllUsesWith(Pred);
+        
         // Move all of the successor contents from Succ to Pred.
         Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                    Succ->end());
@@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         BI->eraseFromParent();
         RemoveFromWorklist(BI, Worklist);
         
-        // If Succ has any successors with PHI nodes, update them to have
-        // entries coming from Pred instead of Succ.
-        Succ->replaceAllUsesWith(Pred);
-        
         // Remove Succ from the loop tree.
         LI->removeBlock(Succ);
         LPM->deleteSimpleAnalysisValue(Succ, L);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be5aa2ea5832..7ed3db6cc1db 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // happen to be using a load-store pair to implement it, rather than
   // a memcpy.
   if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
-    if (!LI->isVolatile() && LI->hasOneUse()) {
+    if (!LI->isVolatile() && LI->hasOneUse() &&
+        LI->getParent() == SI->getParent()) {
       MemDepResult ldep = MD->getDependency(LI);
       CallInst *C = 0;
       if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       if (C) {
         // Check that nothing touches the dest of the "copy" between
         // the call and the store.
-        MemDepResult sdep = MD->getDependency(SI);
-        if (!sdep.isNonLocal()) {
-          bool FoundCall = false;
-          for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
-            if (&*I == C) {
-              FoundCall = true;
-              break;
-            }
-          }
-          if (!FoundCall)
+        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+        AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+        for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+                                  E = C; I != E; --I) {
+          if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
             C = 0;
+            break;
+          }
         }
       }
 
@@ -842,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   
   // If not, then we know we can transform this.
   Module *Mod = M->getParent()->getParent()->getParent();
-  const Type *ArgTys[3] = { M->getRawDest()->getType(),
-                            M->getRawSource()->getType(),
-                            M->getLength()->getType() };
+  Type *ArgTys[3] = { M->getRawDest()->getType(),
+                      M->getRawSource()->getType(),
+                      M->getLength()->getType() };
   M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
-                                                 ArgTys, 3));
+                                                 ArgTys));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
new file mode 100644
index 000000000000..ee132d3be4f5
--- /dev/null
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3595 @@
+//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines ObjC ARC optimizations. ARC stands for
+// Automatic Reference Counting and is a system for managing reference counts
+// for objects in Objective C.
+//
+// The optimizations performed include elimination of redundant, partially
+// redundant, and inconsequential reference count operations, elimination of
+// redundant weak pointer operations, pattern-matching and replacement of
+// low-level operations into higher-level operations, and numerous minor
+// simplifications.
+//
+// This file also defines a simple ARC-aware AliasAnalysis.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowedge of their semantics.
+//
+// WARNING: This file knows about how certain Objective-C library functions are
+// used. Naive LLVM IR transformations which would otherwise be
+// behavior-preserving may break these assumptions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// A handy option to enable/disable all optimizations in this file.
+static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// Misc. Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// MapVector - An associative container with fast insertion-order
+  /// (deterministic) iteration over its elements. Plus the special
+  /// blot operation.
+  template<class KeyT, class ValueT>
+  class MapVector {
+    /// Map - Map keys to indices in Vector.
+    typedef DenseMap<KeyT, size_t> MapTy;
+    MapTy Map;
+
+    /// Vector - Keys and values.
+    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+    VectorTy Vector;
+
+  public:
+    typedef typename VectorTy::iterator iterator;
+    typedef typename VectorTy::const_iterator const_iterator;
+    iterator begin() { return Vector.begin(); }
+    iterator end() { return Vector.end(); }
+    const_iterator begin() const { return Vector.begin(); }
+    const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+    ~MapVector() {
+      assert(Vector.size() >= Map.size()); // May differ due to blotting.
+      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+           I != E; ++I) {
+        assert(I->second < Vector.size());
+        assert(Vector[I->second].first == I->first);
+      }
+      for (typename VectorTy::const_iterator I = Vector.begin(),
+           E = Vector.end(); I != E; ++I)
+        assert(!I->first ||
+               (Map.count(I->first) &&
+                Map[I->first] == size_t(I - Vector.begin())));
+    }
+#endif
+
+    ValueT &operator[](KeyT Arg) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(Arg, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(std::make_pair(Arg, ValueT()));
+        return Vector.back().second;
+      }
+      return Vector[Pair.first->second].second;
+    }
+
+    std::pair<iterator, bool>
+    insert(const std::pair<KeyT, ValueT> &InsertPair) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(InsertPair);
+        return std::make_pair(llvm::prior(Vector.end()), true);
+      }
+      return std::make_pair(Vector.begin() + Pair.first->second, false);
+    }
+
+    const_iterator find(KeyT Key) const {
+      typename MapTy::const_iterator It = Map.find(Key);
+      if (It == Map.end()) return Vector.end();
+      return Vector.begin() + It->second;
+    }
+
+    /// blot - This is similar to erase, but instead of removing the element
+    /// from the vector, it just zeros out the key in the vector. This leaves
+    /// iterators intact, but clients must be prepared for zeroed-out keys when
+    /// iterating.
+    void blot(KeyT Key) {
+      typename MapTy::iterator It = Map.find(Key);
+      if (It == Map.end()) return;
+      Vector[It->second].first = KeyT();
+      Map.erase(It);
+    }
+
+    void clear() {
+      Map.clear();
+      Vector.clear();
+    }
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// ARC Utilities.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// InstructionClass - A simple classification for instructions.
+  enum InstructionClass {
+    IC_Retain,              ///< objc_retain
+    IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
+    IC_RetainBlock,         ///< objc_retainBlock
+    IC_Release,             ///< objc_release
+    IC_Autorelease,         ///< objc_autorelease
+    IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
+    IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+    IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
+    IC_NoopCast,            ///< objc_retainedObject, etc.
+    IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+    IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+    IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
+    IC_StoreWeak,           ///< objc_storeWeak (primitive)
+    IC_InitWeak,            ///< objc_initWeak (derived)
+    IC_LoadWeak,            ///< objc_loadWeak (derived)
+    IC_MoveWeak,            ///< objc_moveWeak (derived)
+    IC_CopyWeak,            ///< objc_copyWeak (derived)
+    IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+    IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
+    IC_Call,                ///< could call objc_release
+    IC_User,                ///< could "use" a pointer
+    IC_None                 ///< anything else
+  };
+}
+
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer.
+static bool IsPotentialUse(const Value *Op) {
+  // Pointers to static or stack storage are not reference-counted pointers.
+  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+    return false;
+  // Special arguments are not reference-counted.
+  if (const Argument *Arg = dyn_cast<Argument>(Op))
+    if (Arg->hasByValAttr() ||
+        Arg->hasNestAttr() ||
+        Arg->hasStructRetAttr())
+      return false;
+  // Only consider values with pointer types, and not function pointers.
+  const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+    return false;
+  // Conservatively assume anything else is a potential use.
+  return true;
+}
+
+/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
+/// of construct CS is.
+static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (IsPotentialUse(*I))
+      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+  return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// GetFunctionClass - Determine if F is one of the special known Functions.
+/// If it isn't, return IC_CallOrUser.
+static InstructionClass GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No arguments.
+  if (AI == AE)
+    return StringSwitch<InstructionClass>(F->getName())
+      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
+      .Default(IC_CallOrUser);
+
+  // One argument.
+  const Argument *A0 = AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      const Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<InstructionClass>(F->getName())
+          .Case("objc_retain",                IC_Retain)
+          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+          .Case("objc_retainBlock",           IC_RetainBlock)
+          .Case("objc_release",               IC_Release)
+          .Case("objc_autorelease",           IC_Autorelease)
+          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
+          .Case("objc_retainedObject",        IC_NoopCast)
+          .Case("objc_unretainedObject",      IC_NoopCast)
+          .Case("objc_unretainedPointer",     IC_NoopCast)
+          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
+          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+          .Default(IC_CallOrUser);
+
+      // Argument is i8**
+      if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<InstructionClass>(F->getName())
+            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
+            .Case("objc_loadWeak",              IC_LoadWeak)
+            .Case("objc_destroyWeak",           IC_DestroyWeak)
+            .Default(IC_CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = AI++;
+  if (AI == AE)
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            const Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<InstructionClass>(F->getName())
+                     .Case("objc_storeWeak",             IC_StoreWeak)
+                     .Case("objc_initWeak",              IC_InitWeak)
+                     .Default(IC_CallOrUser);
+            // Second argument is i8**.
+            if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<InstructionClass>(F->getName())
+                       .Case("objc_moveWeak",              IC_MoveWeak)
+                       .Case("objc_copyWeak",              IC_CopyWeak)
+                       .Default(IC_CallOrUser);
+          }
+
+  // Anything else.
+  return IC_CallOrUser;
+}
+
+/// GetInstructionClass - Determine what kind of construct V is.
+static InstructionClass GetInstructionClass(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // Check for calls to special functions.
+      if (const Function *F = CI->getCalledFunction()) {
+        InstructionClass Class = GetFunctionClass(F);
+        if (Class != IC_CallOrUser)
+          return Class;
+
+        // None of the intrinsic functions do objc_release. For intrinsics, the
+        // only question is whether or not they may be users.
+        switch (F->getIntrinsicID()) {
+        case 0: break;
+        case Intrinsic::bswap: case Intrinsic::ctpop:
+        case Intrinsic::ctlz: case Intrinsic::cttz:
+        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+        case Intrinsic::stacksave: case Intrinsic::stackrestore:
+        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+        // Don't let dbg info affect our results.
+        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+          // Short cut: Some intrinsics obviously don't use ObjC pointers.
+          return IC_None;
+        default:
+          for (Function::const_arg_iterator AI = F->arg_begin(),
+               AE = F->arg_end(); AI != AE; ++AI)
+            if (IsPotentialUse(AI))
+              return IC_User;
+          return IC_None;
+        }
+      }
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select: case Instruction::PHI:
+    case Instruction::Ret: case Instruction::Br:
+    case Instruction::Switch: case Instruction::IndirectBr:
+    case Instruction::Alloca: case Instruction::VAArg:
+    case Instruction::Add: case Instruction::FAdd:
+    case Instruction::Sub: case Instruction::FSub:
+    case Instruction::Mul: case Instruction::FMul:
+    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+    case Instruction::And: case Instruction::Or: case Instruction::Xor:
+    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+    case Instruction::IntToPtr: case Instruction::FCmp:
+    case Instruction::FPTrunc: case Instruction::FPExt:
+    case Instruction::FPToUI: case Instruction::FPToSI:
+    case Instruction::UIToFP: case Instruction::SIToFP:
+    case Instruction::InsertElement: case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialUse(I->getOperand(1)))
+        return IC_User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialUse(*OI))
+          return IC_User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return IC_None;
+}
+
+/// GetBasicInstructionClass - Determine what kind of construct V is. This is
+/// similar to GetInstructionClass except that it only detects objc runtine
+/// calls. This allows it to be faster.
+static InstructionClass GetBasicInstructionClass(const Value *V) {
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (const Function *F = CI->getCalledFunction())
+      return GetFunctionClass(F);
+    // Otherwise, be conservative.
+    return IC_CallOrUser;
+  }
+
+  // Otherwise, be conservative.
+  return IC_User;
+}
+
+/// IsRetain - Test if the the given class is objc_retain or
+/// equivalent.
+static bool IsRetain(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV;
+}
+
+/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// equivalent.
+static bool IsAutorelease(InstructionClass Class) {
+  return Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsForwarding - Test if the given class represents instructions which return
+/// their argument verbatim.
+static bool IsForwarding(InstructionClass Class) {
+  // objc_retainBlock technically doesn't always return its argument
+  // verbatim, but it doesn't matter for our purposes here.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_NoopCast;
+}
+
+/// IsNoopOnNull - Test if the given class represents instructions which do
+/// nothing if passed a null pointer.
+static bool IsNoopOnNull(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock;
+}
+
+/// IsAlwaysTail - Test if the given class represents instructions which are
+/// always safe to mark with the "tail" keyword.
+static bool IsAlwaysTail(InstructionClass Class) {
+  // IC_RetainBlock may be given a stack argument.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsNoThrow - Test if the given class represents instructions which are always
+/// safe to mark with the nounwind attribute..
+static bool IsNoThrow(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_AutoreleasepoolPush ||
+         Class == IC_AutoreleasepoolPop;
+}
+
+/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// argument verbatim, so if it's such a call and the return value has users,
+/// replace them with the argument value.
+static void EraseInstruction(Instruction *CI) {
+  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+  bool Unused = CI->use_empty();
+
+  if (!Unused) {
+    // Replace the return value with the argument.
+    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+           "Can't delete non-forwarding instruction with users!");
+    CI->replaceAllUsesWith(OldArg);
+  }
+
+  CI->eraseFromParent();
+
+  if (Unused)
+    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
+/// also knows how to look through objc_retain and objc_autorelease calls, which
+/// we know to return their argument verbatim.
+static const Value *GetUnderlyingObjCPtr(const Value *V) {
+  for (;;) {
+    V = GetUnderlyingObject(V);
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static Value *StripPointerCastsAndObjCCalls(Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// GetObjCArg - Assuming the given instruction is one of the special calls such
+/// as objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static Value *GetObjCArg(Value *Inst) {
+  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
+/// isObjCIdentifiedObject, except that it uses special knowledge of
+/// ObjC conventions...
+static bool IsObjCIdentifiedObject(const Value *V) {
+  // Assume that call results and arguments have their own "provenance".
+  // Constants (including GlobalVariables) and Allocas are never
+  // reference-counted.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+      isa<Argument>(V) || isa<Constant>(V) ||
+      isa<AllocaInst>(V))
+    return true;
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    const Value *Pointer =
+      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      StringRef Name = GV->getName();
+      // These special variables are known to hold values which are not
+      // reference-counted pointers.
+      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+          Name.startswith("\01l_objc_msgSend_fixup_"))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// FindSingleUseIdentifiedObject - This is similar to
+/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
+/// with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+  if (Arg->hasOneUse()) {
+    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+      return FindSingleUseIdentifiedObject(BC->getOperand(0));
+    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+      if (GEP->hasAllZeroIndices())
+        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+    if (IsForwarding(GetBasicInstructionClass(Arg)))
+      return FindSingleUseIdentifiedObject(
+               cast<CallInst>(Arg)->getArgOperand(0));
+    if (!IsObjCIdentifiedObject(Arg))
+      return 0;
+    return Arg;
+  }
+
+  // If we found an identifiable object but it has multiple uses, but they
+  // are trivial uses, we can still consider this to be a single-use
+  // value.
+  if (IsObjCIdentifiedObject(Arg)) {
+    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+         UI != UE; ++UI) {
+      const User *U = *UI;
+      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+         return 0;
+    }
+
+    return Arg;
+  }
+
+  return 0;
+}
+
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer");
+}
+
+//===----------------------------------------------------------------------===//
+// ARC AliasAnalysis.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+
+namespace {
+  /// ObjCARCAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses knowledge of ARC constructs to answer queries.
+  ///
+  /// TODO: This class could be generalized to know about other ObjC-specific
+  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+  /// even though their offsets are dynamic.
+  class ObjCARCAliasAnalysis : public ImmutablePass,
+                               public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+                   "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+  return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+  // precise alias query.
+  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+  AliasResult Result =
+    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+                         Location(SB, LocB.Size, LocB.TBAATag));
+  if (Result != MayAlias)
+    return Result;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *UA = GetUnderlyingObjCPtr(SA);
+  const Value *UB = GetUnderlyingObjCPtr(SB);
+  if (UA != SA || UB != SB) {
+    Result = AliasAnalysis::alias(Location(UA), Location(UB));
+    // We can't use MustAlias or PartialAlias results here because
+    // GetUnderlyingObjCPtr may return an offsetted pointer value.
+    if (Result == NoAlias)
+      return NoAlias;
+  }
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                             bool OrLocal) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making
+  // a precise alias query.
+  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+                                            OrLocal))
+    return true;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *U = GetUnderlyingObjCPtr(S);
+  if (U != S)
+    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  // We have nothing to do. Just chain to the next AliasAnalysis.
+  return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefBehavior(F);
+
+  switch (GetFunctionClass(F)) {
+  case IC_NoopCast:
+    return DoesNotAccessMemory;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  switch (GetBasicInstructionClass(CS.getInstruction())) {
+  case IC_Retain:
+  case IC_RetainRV:
+  case IC_RetainBlock:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_NoopCast:
+  case IC_AutoreleasepoolPush:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    // These functions don't access any memory visible to the compiler.
+    return NoModRef;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  // TODO: Theoretically we could check for dependencies between objc_* calls
+  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+//===----------------------------------------------------------------------===//
+// ARC expansion.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Scalar.h"
+
+namespace {
+  /// ObjCARCExpand - Early ARC transformations.
+  class ObjCARCExpand : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+  public:
+    static char ID;
+    ObjCARCExpand() : FunctionPass(ID) {
+      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+                "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+  return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  bool Changed = false;
+
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_Retain:
+    case IC_RetainRV:
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      // These calls return their argument verbatim, as a low-level
+      // optimization. However, this makes high-level optimizations
+      // harder. Undo any uses of this optimization that the front-end
+      // emitted here. We'll redo them in a later pass.
+      Changed = true;
+      Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// ARC optimization.
+//===----------------------------------------------------------------------===//
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+#include "llvm/GlobalAlias.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets,        "Number of return value forwarding "
+                          "retain+autoreleaes eliminated");
+STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+
+namespace {
+  /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
+  /// uses many of the same techniques, except it uses special ObjC-specific
+  /// reasoning about pointer relationships.
+  class ProvenanceAnalysis {
+    AliasAnalysis *AA;
+
+    typedef std::pair<const Value *, const Value *> ValuePairTy;
+    typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+    CachedResultsTy CachedResults;
+
+    bool relatedCheck(const Value *A, const Value *B);
+    bool relatedSelect(const SelectInst *A, const Value *B);
+    bool relatedPHI(const PHINode *A, const Value *B);
+
+    // Do not implement.
+    void operator=(const ProvenanceAnalysis &);
+    ProvenanceAnalysis(const ProvenanceAnalysis &);
+
+  public:
+    ProvenanceAnalysis() {}
+
+    void setAA(AliasAnalysis *aa) { AA = aa; }
+
+    AliasAnalysis *getAA() const { return AA; }
+
+    bool related(const Value *A, const Value *B);
+
+    void clear() {
+      CachedResults.clear();
+    }
+  };
+}
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for relations between the values on corresponding arms.
+  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+    if (A->getCondition() == SB->getCondition()) {
+      if (related(A->getTrueValue(), SB->getTrueValue()))
+        return true;
+      if (related(A->getFalseValue(), SB->getFalseValue()))
+        return true;
+      return false;
+    }
+
+  // Check both arms of the Select node individually.
+  if (related(A->getTrueValue(), B))
+    return true;
+  if (related(A->getFalseValue(), B))
+    return true;
+
+  // The arms both checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
+  // If the values are PHIs in the same block, we can do a more precise as well
+  // as efficient check: just check for relations between the values on
+  // corresponding edges.
+  if (const PHINode *PNB = dyn_cast<PHINode>(B))
+    if (PNB->getParent() == A->getParent()) {
+      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+        if (related(A->getIncomingValue(i),
+                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+          return true;
+      return false;
+    }
+
+  // Check each unique source of the PHI node against B.
+  SmallPtrSet<const Value *, 4> UniqueSrc;
+  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+    const Value *PV1 = A->getIncomingValue(i);
+    if (UniqueSrc.insert(PV1) && related(PV1, B))
+      return true;
+  }
+
+  // All of the arms checked out.
+  return false;
+}
+
+/// isStoredObjCPointer - Test if the value of P, or any value covered by its
+/// provenance, is ever stored within the function (not counting callees).
+static bool isStoredObjCPointer(const Value *P) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Worklist;
+  Worklist.push_back(P);
+  Visited.insert(P);
+  do {
+    P = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+         UI != UE; ++UI) {
+      const User *Ur = *UI;
+      if (isa<StoreInst>(Ur)) {
+        if (UI.getOperandNo() == 0)
+          // The pointer is stored.
+          return true;
+        // The pointed is stored through.
+        continue;
+      }
+      if (isa<CallInst>(Ur))
+        // The pointer is passed as an argument, ignore this.
+        continue;
+      if (isa<PtrToIntInst>(P))
+        // Assume the worst.
+        return true;
+      if (Visited.insert(Ur))
+        Worklist.push_back(Ur);
+    }
+  } while (!Worklist.empty());
+
+  // Everything checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
+  // Skip past provenance pass-throughs.
+  A = GetUnderlyingObjCPtr(A);
+  B = GetUnderlyingObjCPtr(B);
+
+  // Quick check.
+  if (A == B)
+    return true;
+
+  // Ask regular AliasAnalysis, for a first approximation.
+  switch (AA->alias(A, B)) {
+  case AliasAnalysis::NoAlias:
+    return false;
+  case AliasAnalysis::MustAlias:
+  case AliasAnalysis::PartialAlias:
+    return true;
+  case AliasAnalysis::MayAlias:
+    break;
+  }
+
+  bool AIsIdentified = IsObjCIdentifiedObject(A);
+  bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+  // An ObjC-Identified object can't alias a load if it is never locally stored.
+  if (AIsIdentified) {
+    if (BIsIdentified) {
+      // If both pointers have provenance, they can be directly compared.
+      if (A != B)
+        return false;
+    } else {
+      if (isa<LoadInst>(B))
+        return isStoredObjCPointer(A);
+    }
+  } else {
+    if (BIsIdentified && isa<LoadInst>(A))
+      return isStoredObjCPointer(B);
+  }
+
+   // Special handling for PHI and Select.
+  if (const PHINode *PN = dyn_cast<PHINode>(A))
+    return relatedPHI(PN, B);
+  if (const PHINode *PN = dyn_cast<PHINode>(B))
+    return relatedPHI(PN, A);
+  if (const SelectInst *S = dyn_cast<SelectInst>(A))
+    return relatedSelect(S, B);
+  if (const SelectInst *S = dyn_cast<SelectInst>(B))
+    return relatedSelect(S, A);
+
+  // Conservative.
+  return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+  // Begin by inserting a conservative value into the map. If the insertion
+  // fails, we have the answer already. If it succeeds, leave it there until we
+  // compute the real answer to guard against recursive queries.
+  if (A > B) std::swap(A, B);
+  std::pair<CachedResultsTy::iterator, bool> Pair =
+    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  bool Result = relatedCheck(A, B);
+  CachedResults[ValuePairTy(A, B)] = Result;
+  return Result;
+}
+
+namespace {
+  // Sequence - A sequence of states that a pointer may go through in which an
+  // objc_retain and objc_release are actually needed.
+  enum Sequence {
+    S_None,
+    S_Retain,         ///< objc_retain(x)
+    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement
+    S_Use,            ///< any use of x
+    S_Stop,           ///< like S_Release, but code motion is stopped
+    S_Release,        ///< objc_release(x)
+    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release
+  };
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+  // The easy cases.
+  if (A == B)
+    return A;
+  if (A == S_None || B == S_None)
+    return S_None;
+
+  // Note that we can't merge S_CanRelease and S_Use.
+  if (A > B) std::swap(A, B);
+  if (TopDown) {
+    // Choose the side which is further along in the sequence.
+    if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+      return B;
+  } else {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Use || A == S_CanRelease) &&
+        (B == S_Release || B == S_Stop || B == S_MovableRelease))
+      return A;
+    // If both sides are releases, choose the more conservative one.
+    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+      return A;
+    if (A == S_Release && B == S_MovableRelease)
+      return A;
+  }
+
+  return S_None;
+}
+
+namespace {
+  /// RRInfo - Unidirectional information about either a
+  /// retain-decrement-use-release sequence or release-use-decrement-retain
+  /// reverese sequence.
+  struct RRInfo {
+    /// KnownIncremented - After an objc_retain, the reference count of the
+    /// referenced object is known to be positive. Similarly, before an
+    /// objc_release, the reference count of the referenced object is known to
+    /// be positive. If there are retain-release pairs in code regions where the
+    /// retain count is known to be positive, they can be eliminated, regardless
+    /// of any side effects between them.
+    bool KnownIncremented;
+
+    /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
+    /// opposed to objc_retain calls).
+    bool IsRetainBlock;
+
+    /// IsTailCallRelease - True of the objc_release calls are all marked
+    /// with the "tail" keyword.
+    bool IsTailCallRelease;
+
+    /// ReleaseMetadata - If the Calls are objc_release calls and they all have
+    /// a clang.imprecise_release tag, this is the metadata tag.
+    MDNode *ReleaseMetadata;
+
+    /// Calls - For a top-down sequence, the set of objc_retains or
+    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+    SmallPtrSet<Instruction *, 2> Calls;
+
+    /// ReverseInsertPts - The set of optimal insert positions for
+    /// moving calls in the opposite sequence.
+    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+    RRInfo() :
+      KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+      ReleaseMetadata(0) {}
+
+    void clear();
+  };
+}
+
+void RRInfo::clear() {
+  KnownIncremented = false;
+  IsRetainBlock = false;
+  IsTailCallRelease = false;
+  ReleaseMetadata = 0;
+  Calls.clear();
+  ReverseInsertPts.clear();
+}
+
+namespace {
+  /// PtrState - This class summarizes several per-pointer runtime properties
+  /// which are propogated through the flow graph.
+  class PtrState {
+    /// RefCount - The known minimum number of reference count increments.
+    unsigned RefCount;
+
+    /// Seq - The current position in the sequence.
+    Sequence Seq;
+
+  public:
+    /// RRI - Unidirectional information about the current sequence.
+    /// TODO: Encapsulate this better.
+    RRInfo RRI;
+
+    PtrState() : RefCount(0), Seq(S_None) {}
+
+    void IncrementRefCount() {
+      if (RefCount != UINT_MAX) ++RefCount;
+    }
+
+    void DecrementRefCount() {
+      if (RefCount != 0) --RefCount;
+    }
+
+    void ClearRefCount() {
+      RefCount = 0;
+    }
+
+    bool IsKnownIncremented() const {
+      return RefCount > 0;
+    }
+
+    void SetSeq(Sequence NewSeq) {
+      Seq = NewSeq;
+    }
+
+    void SetSeqToRelease(MDNode *M) {
+      if (Seq == S_None || Seq == S_Use) {
+        Seq = M ? S_MovableRelease : S_Release;
+        RRI.ReleaseMetadata = M;
+      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
+        Seq = S_Release;
+        RRI.ReleaseMetadata = 0;
+      }
+    }
+
+    Sequence GetSeq() const {
+      return Seq;
+    }
+
+    void ClearSequenceProgress() {
+      Seq = S_None;
+      RRI.clear();
+    }
+
+    void Merge(const PtrState &Other, bool TopDown);
+  };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  RefCount = std::min(RefCount, Other.RefCount);
+
+  // We can't merge a plain objc_retain with an objc_retainBlock.
+  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+    Seq = S_None;
+
+  if (Seq == S_None) {
+    RRI.clear();
+  } else {
+    // Conservatively merge the ReleaseMetadata information.
+    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+      RRI.ReleaseMetadata = 0;
+
+    RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+    RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+    RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
+                                Other.RRI.ReverseInsertPts.end());
+  }
+}
+
+namespace {
+  /// BBState - Per-BasicBlock state.
+  class BBState {
+    /// TopDownPathCount - The number of unique control paths from the entry
+    /// which can reach this block.
+    unsigned TopDownPathCount;
+
+    /// BottomUpPathCount - The number of unique control paths to exits
+    /// from this block.
+    unsigned BottomUpPathCount;
+
+    /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
+    typedef MapVector<const Value *, PtrState> MapTy;
+
+    /// PerPtrTopDown - The top-down traversal uses this to record information
+    /// known about a pointer at the bottom of each block.
+    MapTy PerPtrTopDown;
+
+    /// PerPtrBottomUp - The bottom-up traversal uses this to record information
+    /// known about a pointer at the top of each block.
+    MapTy PerPtrBottomUp;
+
+  public:
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+    typedef MapTy::iterator ptr_iterator;
+    typedef MapTy::const_iterator ptr_const_iterator;
+
+    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+    ptr_const_iterator top_down_ptr_begin() const {
+      return PerPtrTopDown.begin();
+    }
+    ptr_const_iterator top_down_ptr_end() const {
+      return PerPtrTopDown.end();
+    }
+
+    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+    ptr_const_iterator bottom_up_ptr_begin() const {
+      return PerPtrBottomUp.begin();
+    }
+    ptr_const_iterator bottom_up_ptr_end() const {
+      return PerPtrBottomUp.end();
+    }
+
+    /// SetAsEntry - Mark this block as being an entry block, which has one
+    /// path from the entry by definition.
+    void SetAsEntry() { TopDownPathCount = 1; }
+
+    /// SetAsExit - Mark this block as being an exit block, which has one
+    /// path to an exit by definition.
+    void SetAsExit()  { BottomUpPathCount = 1; }
+
+    PtrState &getPtrTopDownState(const Value *Arg) {
+      return PerPtrTopDown[Arg];
+    }
+
+    PtrState &getPtrBottomUpState(const Value *Arg) {
+      return PerPtrBottomUp[Arg];
+    }
+
+    void clearBottomUpPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void clearTopDownPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void InitFromPred(const BBState &Other);
+    void InitFromSucc(const BBState &Other);
+    void MergePred(const BBState &Other);
+    void MergeSucc(const BBState &Other);
+
+    /// GetAllPathCount - Return the number of possible unique paths from an
+    /// entry to an exit which pass through this block. This is only valid
+    /// after both the top-down and bottom-up traversals are complete.
+    unsigned GetAllPathCount() const {
+      return TopDownPathCount * BottomUpPathCount;
+    }
+  };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+  PerPtrTopDown = Other.PerPtrTopDown;
+  TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+  PerPtrBottomUp = Other.PerPtrBottomUp;
+  BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// MergePred - The top-down traversal uses this to merge information about
+/// predecessors to form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+  // Other.TopDownPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  TopDownPathCount += Other.TopDownPathCount;
+
+  // For each entry in the other set, if our set has an entry with the same key,
+  // merge the entries. Otherwise, copy the entry and merge it with an empty
+  // entry.
+  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/true);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry with the
+  // same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = top_down_ptr_begin(),
+       ME = top_down_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// MergeSucc - The bottom-up traversal uses this to merge information about
+/// successors to form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  BottomUpPathCount += Other.BottomUpPathCount;
+
+  // For each entry in the other set, if our set has an entry with the
+  // same key, merge the entries. Otherwise, copy the entry and merge
+  // it with an empty entry.
+  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/false);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry
+  // with the same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = bottom_up_ptr_begin(),
+       ME = bottom_up_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+  /// ObjCARCOpt - The main ARC optimization pass.
+  class ObjCARCOpt : public FunctionPass {
+    bool Changed;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// RetainFunc, RelaseFunc - Declarations for objc_retain,
+    /// objc_retainBlock, and objc_release.
+    Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
+
+    /// RetainRVCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
+             *RetainCallee, *AutoreleaseCallee;
+
+    /// UsedInThisFunciton - Flags which determine whether each of the
+    /// interesting runtine functions is in fact used in the current function.
+    unsigned UsedInThisFunction;
+
+    /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
+    /// metadata.
+    unsigned ImpreciseReleaseMDKind;
+
+    Constant *getRetainRVCallee(Module *M);
+    Constant *getAutoreleaseRVCallee(Module *M);
+    Constant *getReleaseCallee(Module *M);
+    Constant *getRetainCallee(Module *M);
+    Constant *getAutoreleaseCallee(Module *M);
+
+    void OptimizeRetainCall(Function &F, Instruction *Retain);
+    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
+    void OptimizeIndividualCalls(Function &F);
+
+    void CheckForCFGHazards(const BasicBlock *BB,
+                            DenseMap<const BasicBlock *, BBState> &BBStates,
+                            BBState &MyStates) const;
+    bool VisitBottomUp(BasicBlock *BB,
+                       DenseMap<const BasicBlock *, BBState> &BBStates,
+                       MapVector<Value *, RRInfo> &Retains);
+    bool VisitTopDown(BasicBlock *BB,
+                      DenseMap<const BasicBlock *, BBState> &BBStates,
+                      DenseMap<Value *, RRInfo> &Releases);
+    bool Visit(Function &F,
+               DenseMap<const BasicBlock *, BBState> &BBStates,
+               MapVector<Value *, RRInfo> &Retains,
+               DenseMap<Value *, RRInfo> &Releases);
+
+    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+                   MapVector<Value *, RRInfo> &Retains,
+                   DenseMap<Value *, RRInfo> &Releases,
+                   SmallVectorImpl<Instruction *> &DeadInsts);
+
+    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+                              MapVector<Value *, RRInfo> &Retains,
+                              DenseMap<Value *, RRInfo> &Releases);
+
+    void OptimizeWeakCalls(Function &F);
+
+    bool OptimizeSequences(Function &F);
+
+    void OptimizeReturns(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+    virtual void releaseMemory();
+
+  public:
+    static char ID;
+    ObjCARCOpt() : FunctionPass(ID) {
+      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+                      "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+                    "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+  return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ObjCARCAliasAnalysis>();
+  AU.addRequired<AliasAnalysis>();
+  // ARC optimization doesn't currently split critical edges.
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+  if (!AutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+  if (!ReleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    ReleaseCallee =
+      M->getOrInsertFunction(
+        "objc_release",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+  if (!RetainCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainCallee =
+      M->getOrInsertFunction(
+        "objc_retain",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+  if (!AutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseCallee =
+      M->getOrInsertFunction(
+        "objc_autorelease",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return AutoreleaseCallee;
+}
+
+/// CanAlterRefCount - Test whether the given instruction can result in a
+/// reference count modification (positive or negative) for the pointer's
+/// object.
+static bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                 ProvenanceAnalysis &PA, InstructionClass Class) {
+  switch (Class) {
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_User:
+    // These operations never directly modify a reference count.
+    return false;
+  default: break;
+  }
+
+  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  assert(CS && "Only calls can alter reference counts!");
+
+  // See if AliasAnalysis can help us with the call.
+  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  if (AliasAnalysis::onlyReadsMemory(MRB))
+    return false;
+  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+         I != E; ++I) {
+      const Value *Op = *I;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  }
+
+  // Assume the worst.
+  return true;
+}
+
+/// CanUse - Test whether the given instruction can "use" the given pointer's
+/// object in a way that requires the reference count to be positive.
+static bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+       InstructionClass Class) {
+  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+  if (Class == IC_Call)
+    return false;
+
+  // Consider various instructions which may have pointer arguments which are
+  // not "uses".
+  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+    // Comparing a pointer with null, or any other constant, isn't really a use,
+    // because we don't care what the pointer points to, or about the values
+    // of any other dynamic reference-counted pointers.
+    if (!IsPotentialUse(ICI->getOperand(1)))
+      return false;
+  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+    // For calls, just check the arguments (and not the callee operand).
+    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+         OE = CS.arg_end(); OI != OE; ++OI) {
+      const Value *Op = *OI;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    // Special-case stores, because we don't care about the stored value, just
+    // the store address.
+    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+    // If we can't tell what the underlying object was, assume there is a
+    // dependence.
+    return IsPotentialUse(Op) && PA.related(Op, Ptr);
+  }
+
+  // Check each operand for a match.
+  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+       OI != OE; ++OI) {
+    const Value *Op = *OI;
+    if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+      return true;
+  }
+  return false;
+}
+
+/// CanInterruptRV - Test whether the given instruction can autorelease
+/// any pointer or cause an autoreleasepool pop.
+static bool
+CanInterruptRV(InstructionClass Class) {
+  switch (Class) {
+  case IC_AutoreleasepoolPop:
+  case IC_CallOrUser:
+  case IC_Call:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    return true;
+  default:
+    return false;
+  }
+}
+
+namespace {
+  /// DependenceKind - There are several kinds of dependence-like concepts in
+  /// use here.
+  enum DependenceKind {
+    NeedsPositiveRetainCount,
+    CanChangeRetainCount,
+    RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
+    RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
+    RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+  };
+}
+
+/// Depends - Test if there can be dependencies on Inst through Arg. This
+/// function only tests dependencies relevant for removing pairs of calls.
+static bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+        ProvenanceAnalysis &PA) {
+  // If we've reached the definition of Arg, stop.
+  if (Inst == Arg)
+    return true;
+
+  switch (Flavor) {
+  case NeedsPositiveRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanUse(Inst, Arg, PA, Class);
+    }
+  }
+
+  case CanChangeRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+      // Conservatively assume this can decrement any count.
+      return true;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanAlterRefCount(Inst, Arg, PA, Class);
+    }
+  }
+
+  case RetainAutoreleaseDep:
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPop:
+      // Don't merge an objc_autorelease with an objc_retain inside a different
+      // autoreleasepool scope.
+      return true;
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Nothing else matters for objc_retainAutorelease formation.
+      return false;
+    }
+    break;
+
+  case RetainAutoreleaseRVDep: {
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Anything that can autorelease interrupts
+      // retainAutoreleaseReturnValue formation.
+      return CanInterruptRV(Class);
+    }
+    break;
+  }
+
+  case RetainRVDep:
+    return CanInterruptRV(GetBasicInstructionClass(Inst));
+  }
+
+  llvm_unreachable("Invalid dependence flavor");
+  return true;
+}
+
+/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
+/// find local and non-local dependencies on Arg.
+/// TODO: Cache results?
+static void
+FindDependencies(DependenceKind Flavor,
+                 const Value *Arg,
+                 BasicBlock *StartBB, Instruction *StartInst,
+                 SmallPtrSet<Instruction *, 4> &DependingInstructions,
+                 SmallPtrSet<const BasicBlock *, 4> &Visited,
+                 ProvenanceAnalysis &PA) {
+  BasicBlock::iterator StartPos = StartInst;
+
+  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+  Worklist.push_back(std::make_pair(StartBB, StartPos));
+  do {
+    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+      Worklist.pop_back_val();
+    BasicBlock *LocalStartBB = Pair.first;
+    BasicBlock::iterator LocalStartPos = Pair.second;
+    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+    for (;;) {
+      if (LocalStartPos == StartBBBegin) {
+        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+        if (PI == PE)
+          // If we've reached the function entry, produce a null dependence.
+          DependingInstructions.insert(0);
+        else
+          // Add the predecessors to the worklist.
+          do {
+            BasicBlock *PredBB = *PI;
+            if (Visited.insert(PredBB))
+              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+          } while (++PI != PE);
+        break;
+      }
+
+      Instruction *Inst = --LocalStartPos;
+      if (Depends(Flavor, Inst, Arg, PA)) {
+        DependingInstructions.insert(Inst);
+        break;
+      }
+    }
+  } while (!Worklist.empty());
+
+  // Determine whether the original StartBB post-dominates all of the blocks we
+  // visited. If not, insert a sentinal indicating that most optimizations are
+  // not safe.
+  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+       E = Visited.end(); I != E; ++I) {
+    const BasicBlock *BB = *I;
+    if (BB == StartBB)
+      continue;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+      const BasicBlock *Succ = *SI;
+      if (Succ != StartBB && !Visited.count(Succ)) {
+        DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
+        return;
+      }
+    }
+  }
+}
+
+static bool isNullOrUndef(const Value *V) {
+  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static bool isNoopInstruction(const Instruction *I) {
+  return isa<BitCastInst>(I) ||
+         (isa<GetElementPtrInst>(I) &&
+          cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// OptimizeRetainCall - Turn objc_retain into
+/// objc_retainAutoreleasedReturnValue if the operand is a return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  CallSite CS(GetObjCArg(Retain));
+  Instruction *Call = CS.getInstruction();
+  if (!Call) return;
+  if (Call->getParent() != Retain->getParent()) return;
+
+  // Check that the call is next to the retain.
+  BasicBlock::iterator I = Call;
+  ++I;
+  while (isNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue..
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+}
+
+/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
+/// objc_retain if the operand is not a return value.  Or, if it can be
+/// paired with an objc_autoreleaseReturnValue, delete the pair and
+/// return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+  // Check for the argument being from an immediately preceding call.
+  Value *Arg = GetObjCArg(RetainRV);
+  CallSite CS(Arg);
+  if (Instruction *Call = CS.getInstruction())
+    if (Call->getParent() == RetainRV->getParent()) {
+      BasicBlock::iterator I = Call;
+      ++I;
+      while (isNoopInstruction(I)) ++I;
+      if (&*I == RetainRV)
+        return false;
+    }
+
+  // Check for being preceded by an objc_autoreleaseReturnValue on the same
+  // pointer. In this case, we can delete the pair.
+  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+  if (I != Begin) {
+    do --I; while (I != Begin && isNoopInstruction(I));
+    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+        GetObjCArg(I) == Arg) {
+      Changed = true;
+      ++NumPeeps;
+      EraseInstruction(I);
+      EraseInstruction(RetainRV);
+      return true;
+    }
+  }
+
+  // Turn it to a plain objc_retain.
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+  return false;
+}
+
+/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
+/// objc_autorelease if the result is not used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
+  // Check for a return of the pointer value.
+  const Value *Ptr = GetObjCArg(AutoreleaseRV);
+  for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+       UI != UE; ++UI) {
+    const User *I = *UI;
+    if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+      return;
+  }
+
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(AutoreleaseRV)->
+    setCalledFunction(getAutoreleaseCallee(F.getParent()));
+}
+
+/// OptimizeIndividualCalls - Visit each call, one at a time, and make
+/// simplifications without doing any additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  // Reset all the flags in preparation for recomputing them.
+  UsedInThisFunction = 0;
+
+  // Visit all objc_* calls in F.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    switch (Class) {
+    default: break;
+
+    // Delete no-op casts. These function calls have special semantics, but
+    // the semantics are entirely implemented via lowering in the front-end,
+    // so by the time they reach the optimizer, they are just no-op calls
+    // which return their argument.
+    //
+    // There are gray areas here, as the ability to cast reference-counted
+    // pointers to raw void* and back allows code to break ARC assumptions,
+    // however these are currently considered to be unimportant.
+    case IC_NoopCast:
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+
+    // If the pointer-to-weak-pointer is null, it's undefined behavior.
+    case IC_StoreWeak:
+    case IC_LoadWeak:
+    case IC_LoadWeakRetained:
+    case IC_InitWeak:
+    case IC_DestroyWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_CopyWeak:
+    case IC_MoveWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0)) ||
+          isNullOrUndef(CI->getArgOperand(1))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_Retain:
+      OptimizeRetainCall(F, Inst);
+      break;
+    case IC_RetainRV:
+      if (OptimizeRetainRVCall(F, Inst))
+        continue;
+      break;
+    case IC_AutoreleaseRV:
+      OptimizeAutoreleaseRVCall(F, Inst);
+      break;
+    }
+
+    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+    if (IsAutorelease(Class) && Inst->use_empty()) {
+      CallInst *Call = cast<CallInst>(Inst);
+      const Value *Arg = Call->getArgOperand(0);
+      Arg = FindSingleUseIdentifiedObject(Arg);
+      if (Arg) {
+        Changed = true;
+        ++NumAutoreleases;
+
+        // Create the declaration lazily.
+        LLVMContext &C = Inst->getContext();
+        CallInst *NewCall =
+          CallInst::Create(getReleaseCallee(F.getParent()),
+                           Call->getArgOperand(0), "", Call);
+        NewCall->setMetadata(ImpreciseReleaseMDKind,
+                             MDNode::get(C, ArrayRef<Value *>()));
+        EraseInstruction(Call);
+        Inst = NewCall;
+        Class = IC_Release;
+      }
+    }
+
+    // For functions which can never be passed stack arguments, add
+    // a tail keyword.
+    if (IsAlwaysTail(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setTailCall();
+    }
+
+    // Set nounwind as needed.
+    if (IsNoThrow(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setDoesNotThrow();
+    }
+
+    if (!IsNoopOnNull(Class)) {
+      UsedInThisFunction |= 1 << Class;
+      continue;
+    }
+
+    const Value *Arg = GetObjCArg(Inst);
+
+    // ARC calls with null are no-ops. Delete them.
+    if (isNullOrUndef(Arg)) {
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+    }
+
+    // Keep track of which of retain, release, autorelease, and retain_block
+    // are actually present in this function.
+    UsedInThisFunction |= 1 << Class;
+
+    // If Arg is a PHI, and one or more incoming values to the
+    // PHI are null, and the call is control-equivalent to the PHI, and there
+    // are no relevant side effects between the PHI and the call, the call
+    // could be pushed up to just those paths with non-null incoming values.
+    // For now, don't bother splitting critical edges for this.
+    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+    Worklist.push_back(std::make_pair(Inst, Arg));
+    do {
+      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+      Inst = Pair.first;
+      Arg = Pair.second;
+
+      const PHINode *PN = dyn_cast<PHINode>(Arg);
+      if (!PN) continue;
+
+      // Determine if the PHI has any null operands, or any incoming
+      // critical edges.
+      bool HasNull = false;
+      bool HasCriticalEdges = false;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *Incoming =
+          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+        if (isNullOrUndef(Incoming))
+          HasNull = true;
+        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+                   .getNumSuccessors() != 1) {
+          HasCriticalEdges = true;
+          break;
+        }
+      }
+      // If we have null operands and no critical edges, optimize.
+      if (!HasCriticalEdges && HasNull) {
+        SmallPtrSet<Instruction *, 4> DependingInstructions;
+        SmallPtrSet<const BasicBlock *, 4> Visited;
+
+        // Check that there is nothing that cares about the reference
+        // count between the call and the phi.
+        FindDependencies(NeedsPositiveRetainCount, Arg,
+                         Inst->getParent(), Inst,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() == 1 &&
+            *DependingInstructions.begin() == PN) {
+          Changed = true;
+          ++NumPartialNoops;
+          // Clone the call into each predecessor that has a non-null value.
+          CallInst *CInst = cast<CallInst>(Inst);
+          const Type *ParamTy = CInst->getArgOperand(0)->getType();
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+            Value *Incoming =
+              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+            if (!isNullOrUndef(Incoming)) {
+              CallInst *Clone = cast<CallInst>(CInst->clone());
+              Value *Op = PN->getIncomingValue(i);
+              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+              if (Op->getType() != ParamTy)
+                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+              Clone->setArgOperand(0, Op);
+              Clone->insertBefore(InsertPos);
+              Worklist.push_back(std::make_pair(Clone, Incoming));
+            }
+          }
+          // Erase the original call.
+          EraseInstruction(CInst);
+          continue;
+        }
+      }
+    } while (!Worklist.empty());
+  }
+}
+
+/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
+/// control flow, or other CFG structures where moving code across the edge
+/// would result in it being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+                               DenseMap<const BasicBlock *, BBState> &BBStates,
+                               BBState &MyStates) const {
+  // If any top-down local-use or possible-dec has a succ which is earlier in
+  // the sequence, forget it.
+  for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+       E = MyStates.top_down_ptr_end(); I != E; ++I)
+    switch (I->second.GetSeq()) {
+    default: break;
+    case S_Use: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+        case S_CanRelease:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_Use:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    case S_CanRelease: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_CanRelease:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+        case S_Use:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    }
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+                          DenseMap<const BasicBlock *, BBState> &BBStates,
+                          MapVector<Value *, RRInfo> &Retains) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each successor to compute the initial state
+  // for the current block.
+  const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+  succ_const_iterator SI(TI), SE(TI, false);
+  if (SI == SE)
+    MyStates.SetAsExit();
+  else
+    do {
+      const BasicBlock *Succ = *SI++;
+      if (Succ == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromSucc(I->second);
+      while (SI != SE) {
+        Succ = *SI++;
+        if (Succ != BB) {
+          I = BBStates.find(Succ);
+          if (I != BBStates.end())
+            MyStates.MergeSucc(I->second);
+        }
+      }
+      break;
+    } while (SI != SE);
+
+  // Visit all the instructions, bottom-up.
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+    Instruction *Inst = llvm::prior(I);
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+      // If we see two releases in a row on the same pointer. If so, make
+      // a note, and we'll cicle back to revisit it after we've
+      // hopefully eliminated the second release, which may allow us to
+      // eliminate the first release too.
+      // Theoretically we could implement removal of nested retain+release
+      // pairs by making PtrState hold a stack of states, but this is
+      // simple and avoids adding overhead for the non-nested case.
+      if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+        NestingDetected = true;
+
+      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+      S.RRI.clear();
+      S.RRI.KnownIncremented = S.IsKnownIncremented();
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      S.RRI.Calls.insert(Inst);
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Use:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_CanRelease:
+        // Don't do retain+release tracking for IC_RetainRV, because it's
+        // better to let it remain as the first instruction after a call.
+        if (Class != IC_RetainRV) {
+          S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+          Retains[Inst] = S.RRI;
+        }
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearBottomUpPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+         ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible retains and releases.
+      if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a retain (we're going bottom-up here).
+        S.DecrementRefCount();
+
+        // Check for a release.
+        if (!IsRetain(Class) && Class != IC_RetainBlock)
+          switch (Seq) {
+          case S_Use:
+            S.SetSeq(S_CanRelease);
+            continue;
+          case S_CanRelease:
+          case S_Release:
+          case S_MovableRelease:
+          case S_Stop:
+          case S_None:
+            break;
+          case S_Retain:
+            llvm_unreachable("bottom-up pointer in retain state!");
+          }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_Release:
+      case S_MovableRelease:
+        if (CanUse(Inst, Ptr, PA, Class)) {
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+          S.SetSeq(S_Use);
+        } else if (Seq == S_Release &&
+                   (Class == IC_User || Class == IC_CallOrUser)) {
+          // Non-movable releases depend on any possible objc pointer use.
+          S.SetSeq(S_Stop);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+        }
+        break;
+      case S_Stop:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_CanRelease:
+      case S_Use:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+    }
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+                         DenseMap<const BasicBlock *, BBState> &BBStates,
+                         DenseMap<Value *, RRInfo> &Releases) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each predecessor to compute the initial state
+  // for the current block.
+  const_pred_iterator PI(BB), PE(BB, false);
+  if (PI == PE)
+    MyStates.SetAsEntry();
+  else
+    do {
+      const BasicBlock *Pred = *PI++;
+      if (Pred == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromPred(I->second);
+      while (PI != PE) {
+        Pred = *PI++;
+        if (Pred != BB) {
+          I = BBStates.find(Pred);
+          if (I != BBStates.end())
+            MyStates.MergePred(I->second);
+        }
+      }
+      break;
+    } while (PI != PE);
+
+  // Visit all the instructions, top-down.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    Instruction *Inst = I;
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV) {
+        // If we see two retains in a row on the same pointer. If so, make
+        // a note, and we'll cicle back to revisit it after we've
+        // hopefully eliminated the second retain, which may allow us to
+        // eliminate the first retain too.
+        // Theoretically we could implement removal of nested retain+release
+        // pairs by making PtrState hold a stack of states, but this is
+        // simple and avoids adding overhead for the non-nested case.
+        if (S.GetSeq() == S_Retain)
+          NestingDetected = true;
+
+        S.SetSeq(S_Retain);
+        S.RRI.clear();
+        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+        S.RRI.KnownIncremented = S.IsKnownIncremented();
+        S.RRI.Calls.insert(Inst);
+      }
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Retain:
+      case S_CanRelease:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_Use:
+        S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+        S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+        Releases[Inst] = S.RRI;
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearTopDownPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+         ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible releases.
+      if (!IsRetain(Class) && Class != IC_RetainBlock &&
+          CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a release.
+        S.DecrementRefCount();
+
+        // Check for a release.
+        switch (Seq) {
+        case S_Retain:
+          S.SetSeq(S_CanRelease);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+
+          // One call can't cause a transition from S_Retain to S_CanRelease
+          // and S_CanRelease to S_Use. If we've made the first transition,
+          // we're done.
+          continue;
+        case S_Use:
+        case S_CanRelease:
+        case S_None:
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          llvm_unreachable("top-down pointer in release state!");
+        }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_CanRelease:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_Use:
+      case S_Retain:
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+    }
+  }
+
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+                  DenseMap<const BasicBlock *, BBState> &BBStates,
+                  MapVector<Value *, RRInfo> &Retains,
+                  DenseMap<Value *, RRInfo> &Releases) {
+  // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+  // magically know that loops will be well behaved, i.e. they won't repeatedly
+  // call retain on a single pointer without doing a release.
+  bool BottomUpNestingDetected = false;
+  SmallVector<BasicBlock *, 8> PostOrder;
+  for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+    BasicBlock *BB = *I;
+    PostOrder.push_back(BB);
+
+    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+  }
+
+  // Iterate through the post-order in reverse order, achieving a
+  // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
+  // class here because it works by computing its own full postorder iteration,
+  // recording the sequence, and playing it back in reverse. Since we're already
+  // doing a full iteration above, we can just record the sequence manually and
+  // avoid the cost of having ReversePostOrderTraversal compute it.
+  bool TopDownNestingDetected = false;
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
+       RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
+    TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+
+  return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+                           RRInfo &RetainsToMove,
+                           RRInfo &ReleasesToMove,
+                           MapVector<Value *, RRInfo> &Retains,
+                           DenseMap<Value *, RRInfo> &Releases,
+                           SmallVectorImpl<Instruction *> &DeadInsts) {
+  const Type *ArgTy = Arg->getType();
+  const Type *ParamTy =
+    (RetainRVFunc ? RetainRVFunc :
+     RetainFunc ? RetainFunc :
+     RetainBlockFunc)->arg_begin()->getType();
+
+  // Insert the new retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = ReleasesToMove.ReverseInsertPts.begin(),
+       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call =
+      CallInst::Create(RetainsToMove.IsRetainBlock ?
+                         RetainBlockFunc : RetainFunc,
+                       MyArg, "", InsertPt);
+    Call->setDoesNotThrow();
+    if (!RetainsToMove.IsRetainBlock)
+      Call->setTailCall();
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = RetainsToMove.ReverseInsertPts.begin(),
+       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *LastUse = *PI;
+    Instruction *InsertPts[] = { 0, 0, 0 };
+    if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
+      // We can't insert code immediately after an invoke instruction, so
+      // insert code at the beginning of both successor blocks instead.
+      // The invoke's return value isn't available in the unwind block,
+      // but our releases will never depend on it, because they must be
+      // paired with retains from before the invoke.
+      InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
+      InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+    } else {
+      // Insert code immediately after the last use.
+      InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
+    }
+
+    for (Instruction **I = InsertPts; *I; ++I) {
+      Instruction *InsertPt = *I;
+      Value *MyArg = ArgTy == ParamTy ? Arg :
+                     new BitCastInst(Arg, ParamTy, "", InsertPt);
+      CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+      // Attach a clang.imprecise_release metadata tag, if appropriate.
+      if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+        Call->setMetadata(ImpreciseReleaseMDKind, M);
+      Call->setDoesNotThrow();
+      if (ReleasesToMove.IsTailCallRelease)
+        Call->setTailCall();
+    }
+  }
+
+  // Delete the original retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = RetainsToMove.Calls.begin(),
+       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRetain = *AI;
+    Retains.blot(OrigRetain);
+    DeadInsts.push_back(OrigRetain);
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = ReleasesToMove.Calls.begin(),
+       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRelease = *AI;
+    Releases.erase(OrigRelease);
+    DeadInsts.push_back(OrigRelease);
+  }
+}
+
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+                                   &BBStates,
+                                 MapVector<Value *, RRInfo> &Retains,
+                                 DenseMap<Value *, RRInfo> &Releases) {
+  bool AnyPairsCompletelyEliminated = false;
+  RRInfo RetainsToMove;
+  RRInfo ReleasesToMove;
+  SmallVector<Instruction *, 4> NewRetains;
+  SmallVector<Instruction *, 4> NewReleases;
+  SmallVector<Instruction *, 8> DeadInsts;
+
+  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+       E = Retains.end(); I != E; ) {
+    Value *V = (I++)->first;
+    if (!V) continue; // blotted
+
+    Instruction *Retain = cast<Instruction>(V);
+    Value *Arg = GetObjCArg(Retain);
+
+    // If the object being released is in static or stack storage, we know it's
+    // not being managed by ObjC reference counting, so we can delete pairs
+    // regardless of what possible decrements or uses lie between them.
+    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+    // If a pair happens in a region where it is known that the reference count
+    // is already incremented, we can similarly ignore possible decrements.
+    bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+
+    // Connect the dots between the top-down-collected RetainsToMove and
+    // bottom-up-collected ReleasesToMove to form sets of related calls.
+    // This is an iterative process so that we connect multiple releases
+    // to multiple retains if needed.
+    unsigned OldDelta = 0;
+    unsigned NewDelta = 0;
+    unsigned OldCount = 0;
+    unsigned NewCount = 0;
+    bool FirstRelease = true;
+    bool FirstRetain = true;
+    NewRetains.push_back(Retain);
+    for (;;) {
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+        Instruction *NewRetain = *NI;
+        MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+        assert(It != Retains.end());
+        const RRInfo &NewRetainRRI = It->second;
+        KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewRetainRRI.Calls.begin(),
+             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewRetainRelease = *LI;
+          DenseMap<Value *, RRInfo>::const_iterator Jt =
+            Releases.find(NewRetainRelease);
+          if (Jt == Releases.end())
+            goto next_retain;
+          const RRInfo &NewRetainReleaseRRI = Jt->second;
+          assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+          if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+            OldDelta -=
+              BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+            // Merge the ReleaseMetadata and IsTailCallRelease values.
+            if (FirstRelease) {
+              ReleasesToMove.ReleaseMetadata =
+                NewRetainReleaseRRI.ReleaseMetadata;
+              ReleasesToMove.IsTailCallRelease =
+                NewRetainReleaseRRI.IsTailCallRelease;
+              FirstRelease = false;
+            } else {
+              if (ReleasesToMove.ReleaseMetadata !=
+                    NewRetainReleaseRRI.ReleaseMetadata)
+                ReleasesToMove.ReleaseMetadata = 0;
+              if (ReleasesToMove.IsTailCallRelease !=
+                    NewRetainReleaseRRI.IsTailCallRelease)
+                ReleasesToMove.IsTailCallRelease = false;
+            }
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+                  NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+              }
+            NewReleases.push_back(NewRetainRelease);
+          }
+        }
+      }
+      NewRetains.clear();
+      if (NewReleases.empty()) break;
+
+      // Back the other way.
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+        Instruction *NewRelease = *NI;
+        DenseMap<Value *, RRInfo>::const_iterator It =
+          Releases.find(NewRelease);
+        assert(It != Releases.end());
+        const RRInfo &NewReleaseRRI = It->second;
+        KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewReleaseRRI.Calls.begin(),
+             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewReleaseRetain = *LI;
+          MapVector<Value *, RRInfo>::const_iterator Jt =
+            Retains.find(NewReleaseRetain);
+          if (Jt == Retains.end())
+            goto next_retain;
+          const RRInfo &NewReleaseRetainRRI = Jt->second;
+          assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+          if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+            unsigned PathCount =
+              BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+            OldDelta += PathCount;
+            OldCount += PathCount;
+
+            // Merge the IsRetainBlock values.
+            if (FirstRetain) {
+              RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+              FirstRetain = false;
+            } else if (ReleasesToMove.IsRetainBlock !=
+                       NewReleaseRetainRRI.IsRetainBlock)
+              // It's not possible to merge the sequences if one uses
+              // objc_retain and the other uses objc_retainBlock.
+              goto next_retain;
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+                  PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+                  NewDelta += PathCount;
+                  NewCount += PathCount;
+                }
+              }
+            NewRetains.push_back(NewReleaseRetain);
+          }
+        }
+      }
+      NewReleases.clear();
+      if (NewRetains.empty()) break;
+    }
+
+    // If the pointer is known incremented, we can safely delete the pair
+    // regardless of what's between them.
+    if (KnownIncrementedTD || KnownIncrementedBU) {
+      RetainsToMove.ReverseInsertPts.clear();
+      ReleasesToMove.ReverseInsertPts.clear();
+      NewCount = 0;
+    }
+
+    // Determine whether the original call points are balanced in the retain and
+    // release calls through the program. If not, conservatively don't touch
+    // them.
+    // TODO: It's theoretically possible to do code motion in this case, as
+    // long as the existing imbalances are maintained.
+    if (OldDelta != 0)
+      goto next_retain;
+
+    // Determine whether the new insertion points we computed preserve the
+    // balance of retain and release calls through the program.
+    // TODO: If the fully aggressive solution isn't valid, try to find a
+    // less aggressive solution which is.
+    if (NewDelta != 0)
+      goto next_retain;
+
+    // Ok, everything checks out and we're all set. Let's move some code!
+    Changed = true;
+    AnyPairsCompletelyEliminated = NewCount == 0;
+    NumRRs += OldCount - NewCount;
+    MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+
+  next_retain:
+    NewReleases.clear();
+    NewRetains.clear();
+    RetainsToMove.clear();
+    ReleasesToMove.clear();
+  }
+
+  // Now that we're done moving everything, we can delete the newly dead
+  // instructions, as we no longer need them as insert points.
+  while (!DeadInsts.empty())
+    EraseInstruction(DeadInsts.pop_back_val());
+
+  return AnyPairsCompletelyEliminated;
+}
+
+/// OptimizeWeakCalls - Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+  // itself because it uses AliasAnalysis and we need to do provenance
+  // queries instead.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+      continue;
+
+    // Delete objc_loadWeak calls with no users.
+    if (Class == IC_LoadWeak && Inst->use_empty()) {
+      Inst->eraseFromParent();
+      continue;
+    }
+
+    // TODO: For now, just look for an earlier available version of this value
+    // within the same block. Theoretically, we could do memdep-style non-local
+    // analysis too, but that would want caching. A better approach would be to
+    // use the technique that EarlyCSE uses.
+    inst_iterator Current = llvm::prior(I);
+    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+    for (BasicBlock::iterator B = CurrentBB->begin(),
+                              J = Current.getInstructionIterator();
+         J != B; --J) {
+      Instruction *EarlierInst = &*llvm::prior(J);
+      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+      switch (EarlierClass) {
+      case IC_LoadWeak:
+      case IC_LoadWeakRetained: {
+        // If this is loading from the same pointer, replace this load's value
+        // with that one.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall);
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_StoreWeak:
+      case IC_InitWeak: {
+        // If this is storing to the same pointer and has the same size etc.
+        // replace this load's value with the stored value.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_MoveWeak:
+      case IC_CopyWeak:
+        // TOOD: Grab the copied value.
+        goto clobbered;
+      case IC_AutoreleasepoolPush:
+      case IC_None:
+      case IC_User:
+        // Weak pointers are only modified through the weak entry points
+        // (and arbitrary calls, which could call the weak entry points).
+        break;
+      default:
+        // Anything else could modify the weak pointer.
+        goto clobbered;
+      }
+    }
+  clobbered:;
+  }
+
+  // Then, for each destroyWeak with an alloca operand, check to see if
+  // the alloca and all its users can be zapped.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_DestroyWeak)
+      continue;
+
+    CallInst *Call = cast<CallInst>(Inst);
+    Value *Arg = Call->getArgOperand(0);
+    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ++UI) {
+        Instruction *UserInst = cast<Instruction>(*UI);
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+        case IC_DestroyWeak:
+          continue;
+        default:
+          goto done;
+        }
+      }
+      Changed = true;
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ) {
+        CallInst *UserInst = cast<CallInst>(*UI++);
+        if (!UserInst->use_empty())
+          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+        UserInst->eraseFromParent();
+      }
+      Alloca->eraseFromParent();
+    done:;
+    }
+  }
+}
+
+/// OptimizeSequences - Identify program paths which execute sequences of
+/// retains and releases which can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+  /// Releases, Retains - These are used to store the results of the main flow
+  /// analysis. These use Value* as the key instead of Instruction* so that the
+  /// map stays valid when we get around to rewriting code and calls get
+  /// replaced by arguments.
+  DenseMap<Value *, RRInfo> Releases;
+  MapVector<Value *, RRInfo> Retains;
+
+  /// BBStates, This is used during the traversal of the function to track the
+  /// states for each identified object at each block.
+  DenseMap<const BasicBlock *, BBState> BBStates;
+
+  // Analyze the CFG of the function, and all instructions.
+  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+  // Transform.
+  return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+}
+
+/// OptimizeReturns - Look for this pattern:
+///
+///    %call = call i8* @something(...)
+///    %2 = call i8* @objc_retain(i8* %call)
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+///
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+  if (!F.getReturnType()->isPointerTy())
+    return;
+
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    BasicBlock *BB = FI;
+    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+    if (!Ret) continue;
+
+    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+    FindDependencies(NeedsPositiveRetainCount, Arg,
+                     BB, Ret, DependingInstructions, Visited, PA);
+    if (DependingInstructions.size() != 1)
+      goto next_block;
+
+    {
+      CallInst *Autorelease =
+        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+      if (!Autorelease)
+        goto next_block;
+      InstructionClass AutoreleaseClass =
+        GetBasicInstructionClass(Autorelease);
+      if (!IsAutorelease(AutoreleaseClass))
+        goto next_block;
+      if (GetObjCArg(Autorelease) != Arg)
+        goto next_block;
+
+      DependingInstructions.clear();
+      Visited.clear();
+
+      // Check that there is nothing that can affect the reference
+      // count between the autorelease and the retain.
+      FindDependencies(CanChangeRetainCount, Arg,
+                       BB, Autorelease, DependingInstructions, Visited, PA);
+      if (DependingInstructions.size() != 1)
+        goto next_block;
+
+      {
+        CallInst *Retain =
+          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+        // Check that we found a retain with the same argument.
+        if (!Retain ||
+            !IsRetain(GetBasicInstructionClass(Retain)) ||
+            GetObjCArg(Retain) != Arg)
+          goto next_block;
+
+        DependingInstructions.clear();
+        Visited.clear();
+
+        // Convert the autorelease to an autoreleaseRV, since it's
+        // returning the value.
+        if (AutoreleaseClass == IC_Autorelease) {
+          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+          AutoreleaseClass = IC_AutoreleaseRV;
+        }
+
+        // Check that there is nothing that can affect the reference
+        // count between the retain and the call.
+        FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() != 1)
+          goto next_block;
+
+        {
+          CallInst *Call =
+            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+          // Check that the pointer is the return value of the call.
+          if (!Call || Arg != Call)
+            goto next_block;
+
+          // Check that the call is a regular call.
+          InstructionClass Class = GetBasicInstructionClass(Call);
+          if (Class != IC_CallOrUser && Class != IC_Call)
+            goto next_block;
+
+          // If so, we can zap the retain and autorelease.
+          Changed = true;
+          ++NumRets;
+          EraseInstruction(Retain);
+          EraseInstruction(Autorelease);
+        }
+      }
+    }
+
+  next_block:
+    DependingInstructions.clear();
+    Visited.clear();
+  }
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // Identify the imprecise release metadata kind.
+  ImpreciseReleaseMDKind =
+    M.getContext().getMDKindID("clang.imprecise_release");
+
+  // Identify the declarations for objc_retain and friends.
+  RetainFunc = M.getFunction("objc_retain");
+  RetainBlockFunc = M.getFunction("objc_retainBlock");
+  RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
+  ReleaseFunc = M.getFunction("objc_release");
+
+  // Intuitively, objc_retain and others are nocapture, however in practice
+  // they are not, because they return their argument value. And objc_release
+  // calls finalizers.
+
+  // These are initialized lazily.
+  RetainRVCallee = 0;
+  AutoreleaseRVCallee = 0;
+  ReleaseCallee = 0;
+  RetainCallee = 0;
+  AutoreleaseCallee = 0;
+
+  return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // This pass performs several distinct transformations. As a compile-time aid
+  // when compiling code that isn't ObjC, skip these if the relevant ObjC
+  // library functions aren't declared.
+
+  // Preliminary optimizations. This also computs UsedInThisFunction.
+  OptimizeIndividualCalls(F);
+
+  // Optimizations for weak pointers.
+  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+                            (1 << IC_LoadWeakRetained) |
+                            (1 << IC_StoreWeak) |
+                            (1 << IC_InitWeak) |
+                            (1 << IC_CopyWeak) |
+                            (1 << IC_MoveWeak) |
+                            (1 << IC_DestroyWeak)))
+    OptimizeWeakCalls(F);
+
+  // Optimizations for retain+release pairs.
+  if (UsedInThisFunction & ((1 << IC_Retain) |
+                            (1 << IC_RetainRV) |
+                            (1 << IC_RetainBlock)))
+    if (UsedInThisFunction & (1 << IC_Release))
+      // Run OptimizeSequences until it either stops making changes or
+      // no retain+release pair nesting is detected.
+      while (OptimizeSequences(F)) {}
+
+  // Optimizations if objc_autorelease is used.
+  if (UsedInThisFunction &
+      ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+    OptimizeReturns(F);
+
+  return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+  PA.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// ARC contraction.
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#include "llvm/Operator.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Dominators.h"
+
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+  /// ObjCARCContract - Late ARC optimizations.  These change the IR in a way
+  /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
+  class ObjCARCContract : public FunctionPass {
+    bool Changed;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// StoreStrongCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *StoreStrongCallee,
+             *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
+
+    /// RetainRVMarker - The inline asm string to insert between calls and
+    /// RetainRV calls to make the optimization work on targets which need it.
+    const MDString *RetainRVMarker;
+
+    Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainAutoreleaseCallee(Module *M);
+    Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+    bool ContractAutorelease(Function &F, Instruction *Autorelease,
+                             InstructionClass Class,
+                             SmallPtrSet<Instruction *, 4>
+                               &DependingInstructions,
+                             SmallPtrSet<const BasicBlock *, 4>
+                               &Visited);
+
+    void ContractRelease(Instruction *Release,
+                         inst_iterator &Iter);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+  public:
+    static char ID;
+    ObjCARCContract() : FunctionPass(ID) {
+      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+                      "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+                    "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+  return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<DominatorTree>();
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+  if (!StoreStrongCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *I8XX = PointerType::getUnqual(I8X);
+    std::vector<Type *> Params;
+    Params.push_back(I8XX);
+    Params.push_back(I8X);
+
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Attributes.addAttr(1, Attribute::NoCapture);
+
+    StoreStrongCallee =
+      M->getOrInsertFunction(
+        "objc_storeStrong",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+  if (!RetainAutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseCallee =
+      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
+  }
+  return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+  if (!RetainAutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainAutoreleaseRVCallee;
+}
+
+/// ContractAutorelease - Merge an autorelease with a retain into a fused
+/// call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+                                     InstructionClass Class,
+                                     SmallPtrSet<Instruction *, 4>
+                                       &DependingInstructions,
+                                     SmallPtrSet<const BasicBlock *, 4>
+                                       &Visited) {
+  const Value *Arg = GetObjCArg(Autorelease);
+
+  // Check that there are no instructions between the retain and the autorelease
+  // (such as an autorelease_pop) which may change the count.
+  CallInst *Retain = 0;
+  if (Class == IC_AutoreleaseRV)
+    FindDependencies(RetainAutoreleaseRVDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+  else
+    FindDependencies(RetainAutoreleaseDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+
+  Visited.clear();
+  if (DependingInstructions.size() != 1) {
+    DependingInstructions.clear();
+    return false;
+  }
+
+  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+  DependingInstructions.clear();
+
+  if (!Retain ||
+      GetBasicInstructionClass(Retain) != IC_Retain ||
+      GetObjCArg(Retain) != Arg)
+    return false;
+
+  Changed = true;
+  ++NumPeeps;
+
+  if (Class == IC_AutoreleaseRV)
+    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+  else
+    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+  EraseInstruction(Autorelease);
+  return true;
+}
+
+/// ContractRelease - Attempt to merge an objc_release with a store, load, and
+/// objc_retain to form an objc_storeStrong. This can be a little tricky because
+/// the instructions don't always appear in order, and there may be unrelated
+/// intervening instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+                                      inst_iterator &Iter) {
+  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+  if (!Load || Load->isVolatile()) return;
+
+  // For now, require everything to be in one basic block.
+  BasicBlock *BB = Release->getParent();
+  if (Load->getParent() != BB) return;
+
+  // Walk down to find the store.
+  BasicBlock::iterator I = Load, End = BB->end();
+  ++I;
+  AliasAnalysis::Location Loc = AA->getLocation(Load);
+  while (I != End &&
+         (&*I == Release ||
+          IsRetain(GetBasicInstructionClass(I)) ||
+          !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
+    ++I;
+  StoreInst *Store = dyn_cast<StoreInst>(I);
+  if (!Store || Store->isVolatile()) return;
+  if (Store->getPointerOperand() != Loc.Ptr) return;
+
+  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+  // Walk up to find the retain.
+  I = Store;
+  BasicBlock::iterator Begin = BB->begin();
+  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+    --I;
+  Instruction *Retain = I;
+  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+  if (GetObjCArg(Retain) != New) return;
+
+  Changed = true;
+  ++NumStoreStrongs;
+
+  LLVMContext &C = Release->getContext();
+  const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  const Type *I8XX = PointerType::getUnqual(I8X);
+
+  Value *Args[] = { Load->getPointerOperand(), New };
+  if (Args[0]->getType() != I8XX)
+    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+  if (Args[1]->getType() != I8X)
+    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+  CallInst *StoreStrong =
+    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+                     Args, "", Store);
+  StoreStrong->setDoesNotThrow();
+  StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+  if (&*Iter == Store) ++Iter;
+  Store->eraseFromParent();
+  Release->eraseFromParent();
+  EraseInstruction(Retain);
+  if (Load->use_empty())
+    Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // These are initialized lazily.
+  StoreStrongCallee = 0;
+  RetainAutoreleaseCallee = 0;
+  RetainAutoreleaseRVCallee = 0;
+
+  // Initialize RetainRVMarker.
+  RetainRVMarker = 0;
+  if (NamedMDNode *NMD =
+        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+    if (NMD->getNumOperands() == 1) {
+      const MDNode *N = NMD->getOperand(0);
+      if (N->getNumOperands() == 1)
+        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+          RetainRVMarker = S;
+    }
+
+  return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // For ObjC library calls which return their argument, replace uses of the
+  // argument with uses of the call return value, if it dominates the use. This
+  // reduces register pressure.
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    // Only these library routines return their argument. In particular,
+    // objc_retainBlock does not necessarily return its argument.
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      break;
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+        continue;
+      break;
+    case IC_RetainRV: {
+      // If we're compiling for a target which needs a special inline-asm
+      // marker to do the retainAutoreleasedReturnValue optimization,
+      // insert it now.
+      if (!RetainRVMarker)
+        break;
+      BasicBlock::iterator BBI = Inst;
+      --BBI;
+      while (isNoopInstruction(BBI)) --BBI;
+      if (&*BBI == GetObjCArg(Inst)) {
+        InlineAsm *IA =
+          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+                                           /*isVarArg=*/false),
+                         RetainRVMarker->getString(),
+                         /*Constraints=*/"", /*hasSideEffects=*/true);
+        CallInst::Create(IA, "", Inst);
+      }
+      break;
+    }
+    case IC_InitWeak: {
+      // objc_initWeak(p, null) => *p = null
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(1))) {
+        Value *Null =
+          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+        Changed = true;
+        new StoreInst(Null, CI->getArgOperand(0), CI);
+        CI->replaceAllUsesWith(Null);
+        CI->eraseFromParent();
+      }
+      continue;
+    }
+    case IC_Release:
+      ContractRelease(Inst, I);
+      continue;
+    default:
+      continue;
+    }
+
+    // Don't use GetObjCArg because we don't want to look through bitcasts
+    // and such; to do the replacement, the argument must have type i8*.
+    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+    for (;;) {
+      // If we're compiling bugpointed code, don't get in trouble.
+      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+        break;
+      // Look through the uses of the pointer.
+      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+           UI != UE; ) {
+        Use &U = UI.getUse();
+        unsigned OperandNo = UI.getOperandNo();
+        ++UI; // Increment UI now, because we may unlink its element.
+        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
+          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
+            Changed = true;
+            Instruction *Replacement = Inst;
+            const Type *UseTy = U.get()->getType();
+            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+              // For PHI nodes, insert the bitcast in the predecessor block.
+              unsigned ValNo =
+                PHINode::getIncomingValueNumForOperand(OperandNo);
+              BasicBlock *BB =
+                PHI->getIncomingBlock(ValNo);
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "",
+                                              &BB->back());
+              for (unsigned i = 0, e = PHI->getNumIncomingValues();
+                   i != e; ++i)
+                if (PHI->getIncomingBlock(i) == BB) {
+                  // Keep the UI iterator valid.
+                  if (&PHI->getOperandUse(
+                        PHINode::getOperandNumForIncomingValue(i)) ==
+                        &UI.getUse())
+                    ++UI;
+                  PHI->setIncomingValue(i, Replacement);
+                }
+            } else {
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+              U.set(Replacement);
+            }
+          }
+      }
+
+      // If Arg is a no-op casted pointer, strip one level of casts and
+      // iterate.
+      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+        Arg = BI->getOperand(0);
+      else if (isa<GEPOperator>(Arg) &&
+               cast<GEPOperator>(Arg)->hasAllZeroIndices())
+        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+      else if (isa<GlobalAlias>(Arg) &&
+               !cast<GlobalAlias>(Arg)->mayBeOverridden())
+        Arg = cast<GlobalAlias>(Arg)->getAliasee();
+      else
+        break;
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c1dfe154ae3f..e6341ae3071f 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       // because we can percolate the negate out.  Watch for minint, which
       // cannot be positivified.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
-        if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+        if (CI->isNegative() && !CI->isMinValue(true)) {
           Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
           assert(!Duplicates.count(Factor) &&
                  "Shouldn't have two constant factors, missed a canonicalize");
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 32a050617432..302c287d3cbd 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,7 +48,12 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopUnswitchPass(Registry);
   initializeLoopIdiomRecognizePass(Registry);
   initializeLowerAtomicPass(Registry);
+  initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
+  initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCExpandPass(Registry);
+  initializeObjCARCContractPass(Registry);
+  initializeObjCARCOptPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 8938b287a840..7d6349cf4e77 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -30,6 +30,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
@@ -152,7 +153,8 @@ namespace {
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
 
-    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(
+        AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
   };
   
   // SROA_DT - SROA that uses DominatorTree.
@@ -228,16 +230,30 @@ class ConvertToScalarInfo {
   /// which means that mem2reg can't promote it.
   bool IsNotTrivial;
 
+  /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+  /// computed based on the uses of the alloca rather than the LLVM type system.
+  enum {
+    Unknown,
+
+    // Accesses via GEPs that are consistent with element access of a vector
+    // type. This will not be converted into a vector unless there is a later
+    // access using an actual vector type.
+    ImplicitVector,
+
+    // Accesses via vector operations and GEPs that are consistent with the
+    // layout of a vector type.
+    Vector,
+
+    // An integer bag-of-bits with bitwise operations for insertion and
+    // extraction. Any combination of types can be converted into this kind
+    // of scalar.
+    Integer
+  } ScalarKind;
+
   /// VectorTy - This tracks the type that we should promote the vector to if
   /// it is possible to turn it into a vector.  This starts out null, and if it
   /// isn't possible to turn into a vector type, it gets set to VoidTy.
-  const Type *VectorTy;
-
-  /// HadAVector - True if there is at least one vector access to the alloca.
-  /// We don't want to turn random arrays into vectors and use vector element
-  /// insert/extract, but if there are element accesses to something that is
-  /// also declared as a vector, we do want to promote to a vector.
-  bool HadAVector;
+  const VectorType *VectorTy;
 
   /// HadNonMemTransferAccess - True if there is at least one access to the 
   /// alloca that is not a MemTransferInst.  We don't want to turn structs into
@@ -246,14 +262,14 @@ class ConvertToScalarInfo {
 
 public:
   explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
-    : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
-      HadAVector(false), HadNonMemTransferAccess(false) { }
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
+      VectorTy(0), HadNonMemTransferAccess(false) { }
 
   AllocaInst *TryConvert(AllocaInst *AI);
 
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+  void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
   bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
 
@@ -274,6 +290,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
     return 0;
 
+  // If an alloca has only memset / memcpy uses, it may still have an Unknown
+  // ScalarKind. Treat it as an Integer below.
+  if (ScalarKind == Unknown)
+    ScalarKind = Integer;
+
+  // FIXME: It should be possible to promote the vector type up to the alloca's
+  // size.
+  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+    ScalarKind = Integer;
+
   // If we were able to find a vector type that can handle this with
   // insert/extract elements, and if there was at least one use that had
   // a vector type, promote this to a vector.  We don't want to promote
@@ -281,14 +307,15 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   // we just get a lot of insert/extracts.  If at least one vector is
   // involved, then we probably really do have a union of vector/array.
   const Type *NewTy;
-  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+  if (ScalarKind == Vector) {
+    assert(VectorTy && "Missing type for vector scalar.");
     DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
   } else {
     unsigned BitWidth = AllocaSize * 8;
-    if (!HadAVector && !HadNonMemTransferAccess &&
-        !TD.fitsInLegalInteger(BitWidth))
+    if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+        !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
       return 0;
 
     DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
@@ -300,8 +327,9 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   return NewAI;
 }
 
-/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
-/// so far at the offset specified by Offset (which is specified in bytes).
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
 ///
 /// There are three cases we handle here:
 ///   1) A union of vector types of the same size and potentially its elements.
@@ -316,11 +344,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.  We mark this by setting VectorTy
 ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
-                                      bool IsLoadOrStore) {
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+                                                    uint64_t Offset) {
   // If we already decided to turn this into a blob of integer memory, there is
   // nothing to be done.
-  if (VectorTy && VectorTy->isVoidTy())
+  if (ScalarKind == Integer)
     return;
 
   // If this could be contributing to a vector, analyze it.
@@ -336,7 +364,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
     // Full width accesses can be ignored, because they can always be turned
     // into bitcasts.
     unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-    if (IsLoadOrStore && EltSize == AllocaSize)
+    if (EltSize == AllocaSize)
       return;
 
     // If we're accessing something that could be an element of a vector, see
@@ -345,11 +373,12 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
     if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
         (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
       if (!VectorTy) {
+        ScalarKind = ImplicitVector;
         VectorTy = VectorType::get(In, AllocaSize/EltSize);
         return;
       }
 
-      unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+      unsigned CurrentEltSize = VectorTy->getElementType()
                                 ->getPrimitiveSizeInBits()/8;
       if (EltSize == CurrentEltSize)
         return;
@@ -361,16 +390,13 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
 
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
-  VectorTy = Type::getVoidTy(In->getContext());
+  ScalarKind = Integer;
 }
 
-/// MergeInVectorType - Handles the vector case of MergeInType, returning true
-/// if the type was successfully merged and false otherwise.
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
 bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
                                             uint64_t Offset) {
-  // Remember if we saw a vector type.
-  HadAVector = true;
-
   // TODO: Support nonzero offsets?
   if (Offset != 0)
     return false;
@@ -382,19 +408,22 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
   // If this the first vector we see, remember the type so that we know the
   // element size.
   if (!VectorTy) {
+    ScalarKind = Vector;
     VectorTy = VInTy;
     return true;
   }
 
-  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  unsigned BitWidth = VectorTy->getBitWidth();
   unsigned InBitWidth = VInTy->getBitWidth();
 
   // Vectors of the same size can be converted using a simple bitcast.
-  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
+    ScalarKind = Vector;
     return true;
+  }
 
-  const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
-  const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+  const Type *ElementTy = VectorTy->getElementType();
+  const Type *InElementTy = VInTy->getElementType();
 
   // Do not allow mixed integer and floating-point accesses from vectors of
   // different sizes.
@@ -429,6 +458,7 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
   }
 
   // Pick the largest of the two vector types.
+  ScalarKind = Vector;
   if (InBitWidth > BitWidth)
     VectorTy = VInTy;
 
@@ -456,7 +486,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (LI->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(LI->getType(), Offset, true);
+      MergeInTypeForLoadOrStore(LI->getType(), Offset);
       continue;
     }
 
@@ -467,7 +497,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (SI->getOperand(0)->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(SI->getOperand(0)->getType(), Offset, true);
+      MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
       continue;
     }
 
@@ -498,10 +528,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
     // If this is a constant sized memset of a constant value (e.g. 0) we can
     // handle it.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      // Store of constant value and constant size.
-      if (!isa<ConstantInt>(MSI->getValue()) ||
-          !isa<ConstantInt>(MSI->getLength()))
+      // Store of constant value.
+      if (!isa<ConstantInt>(MSI->getValue()))
+        return false;
+
+      // Store of constant size.
+      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+      if (!Len)
         return false;
+
+      // If the size differs from the alloca, we can only convert the alloca to
+      // an integer bag-of-bits.
+      // FIXME: This should handle all of the cases that are currently accepted
+      // as vector element insertions.
+      if (Len->getZExtValue() != AllocaSize || Offset != 0)
+        ScalarKind = Integer;
+
       IsNotTrivial = true;  // Can't be mem2reg'd.
       HadNonMemTransferAccess = true;
       continue;
@@ -1053,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) {
 namespace {
 class AllocaPromoter : public LoadAndStorePromoter {
   AllocaInst *AI;
+  DIBuilder *DIB;
+  SmallVector<DbgDeclareInst *, 4> DDIs;
+  SmallVector<DbgValueInst *, 4> DVIs;
 public:
   AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
-                 DbgDeclareInst *DD, DIBuilder *&DB)
-    : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {}
+                 DIBuilder *DB)
+    : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
   
   void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
     // Remember which alloca we're promoting (for isInstInList).
     this->AI = AI;
+    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+      for (Value::use_iterator UI = DebugNode->use_begin(),
+             E = DebugNode->use_end(); UI != E; ++UI)
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+          DDIs.push_back(DDI);
+        else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+          DVIs.push_back(DVI);
+
     LoadAndStorePromoter::run(Insts);
     AI->eraseFromParent();
+    for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      DDI->eraseFromParent();
+    }
+    for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      DVI->eraseFromParent();
+    }
   }
   
   virtual bool isInstInList(Instruction *I,
@@ -1071,6 +1134,45 @@ public:
       return LI->getOperand(0) == AI;
     return cast<StoreInst>(I)->getPointerOperand() == AI;
   }
+
+  virtual void updateDebugInfo(Instruction *Inst) const {
+    for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+    }
+    for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        Instruction *DbgVal = NULL;
+        // If an argument is zero extended then use argument directly. The ZExt
+        // may be zapped by an optimization pass in future.
+        Argument *ExtendedArg = NULL;
+        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+        if (ExtendedArg)
+          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        else
+          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        Instruction *DbgVal = 
+          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 
+                                       DIVariable(DVI->getVariable()), LI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      }
+    }
+  }
 };
 } // end anon namespace
 
@@ -1262,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
         LoadInst *TrueLoad = 
           Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
         LoadInst *FalseLoad = 
-          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
         
         // Transfer alignment and TBAA info if present.
         TrueLoad->setAlignment(LI->getAlignment());
@@ -1340,10 +1442,9 @@ bool SROA::performPromotion(Function &F) {
     DT = &getAnalysis<DominatorTree>();
 
   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
-
+  DIBuilder DIB(*F.getParent());
   bool Changed = false;
   SmallVector<Instruction*, 64> Insts;
-  DIBuilder *DIB = 0;
   while (1) {
     Allocas.clear();
 
@@ -1367,11 +1468,7 @@ bool SROA::performPromotion(Function &F) {
         for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
              UI != E; ++UI)
           Insts.push_back(cast<Instruction>(*UI));
-
-        DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
-        if (DDI && !DIB)
-          DIB = new DIBuilder(*AI->getParent()->getParent()->getParent());
-        AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts);
+        AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
         Insts.clear();
       }
     }
@@ -1379,10 +1476,6 @@ bool SROA::performPromotion(Function &F) {
     Changed = true;
   }
 
-  // FIXME: Is there a better way to handle the lazy initialization of DIB
-  // so that there doesn't need to be an explicit delete?
-  delete DIB;
-
   return Changed;
 }
 
@@ -1403,8 +1496,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
 
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the malloc/alloca instructions in the function, removing
-// them if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
@@ -1438,12 +1531,15 @@ bool SROA::performScalarRepl(Function &F) {
     // the constant global instead.  This is commonly produced by the CFE by
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
-    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+    SmallVector<Instruction *, 4> ToDelete;
+    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
       DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
-      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
-      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
+      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
+      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+        ToDelete[i]->eraseFromParent();
+      Constant *TheSrc = cast<Constant>(Copy->getSource());
       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
-      TheCopy->eraseFromParent();  // Don't mutate the global.
+      Copy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
       ++NumGlobals;
       Changed = true;
@@ -2467,8 +2563,14 @@ static bool PointsToConstantGlobal(Value *V) {
 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
 /// the alloca, and if the source pointer is a pointer to a constant global, we
 /// can optimize this.
-static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
-                                           bool isOffset) {
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+                               bool isOffset,
+                               SmallVector<Instruction *, 4> &LifetimeMarkers) {
+  // We track lifetime intrinsics as we encounter them.  If we decide to go
+  // ahead and replace the value with the global, this lets the caller quickly
+  // eliminate the markers.
+
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
     User *U = cast<Instruction>(*UI);
 
@@ -2480,7 +2582,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
 
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
-      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
+                                          LifetimeMarkers))
         return false;
       continue;
     }
@@ -2488,7 +2591,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
       if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
-                                         isOffset || !GEP->hasAllZeroIndices()))
+                                          isOffset || !GEP->hasAllZeroIndices(),
+                                          LifetimeMarkers))
         return false;
       continue;
     }
@@ -2514,6 +2618,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
     }
 
+    // Lifetime intrinsics can be handled by the caller.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        assert(II->use_empty() && "Lifetime markers have no result to use!");
+        LifetimeMarkers.push_back(II);
+        continue;
+      }
+    }
+
     // If this is isn't our memcpy/memmove, reject it as something we can't
     // handle.
     MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
@@ -2550,9 +2664,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
 /// modified by a copy from a constant global.  If we can prove this, we can
 /// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+MemTransferInst *
+SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+                                     SmallVector<Instruction*, 4> &ToDelete) {
   MemTransferInst *TheCopy = 0;
-  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
     return TheCopy;
   return 0;
 }
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7e9cc807b214..a66b3e38258f 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
 static void ChangeToCall(InvokeInst *II) {
   BasicBlock *BB = II->getParent();
   SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
-                                       Args.end(), "", II);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6247b0348f14..7c415e5150dc 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization {
     }
 
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
-    const Type *ArgType = Op->getType();
+    Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
-                                         Intrinsic::cttz, &ArgType, 1);
+                                         Intrinsic::cttz, ArgType);
     Value *V = B.CreateCall(F, Op, "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
     V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 92464e8cf130..b4f74f97e978 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -153,13 +153,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
   
-  // Move all definitions in the successor to the predecessor...
-  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-  
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(PredBB);
   
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index d6206a3f3326..92ce50030a5d 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -193,44 +193,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   
   // If there are any PHI nodes in DestBB, we need to update them so that they
   // merge incoming values from NewBB instead of from TIBB.
-  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
-    // This conceptually does:
-    //  foreach (PHINode *PN in DestBB)
-    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
-    // but is optimized for two cases.
-    
-    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
-      unsigned BBIdx = 0;
-      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
-        // We no longer enter through TIBB, now we come in through NewBB.
-        // Revector exactly one entry in the PHI node that used to come from
-        // TIBB to come from NewBB.
-        PHINode *PN = cast<PHINode>(I);
-        
-        // Reuse the previous value of BBIdx if it lines up.  In cases where we
-        // have multiple phi nodes with *lots* of predecessors, this is a speed
-        // win because we don't have to scan the PHI looking for TIBB.  This
-        // happens because the BB list of PHI nodes are usually in the same
-        // order.
-        if (PN->getIncomingBlock(BBIdx) != TIBB)
-          BBIdx = PN->getBasicBlockIndex(TIBB);
-        PN->setIncomingBlock(BBIdx, NewBB);
-      }
-    } else {
-      // However, the foreach loop is slow for blocks with lots of predecessors
-      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
-      // the user list of TIBB to find the PHI nodes.
-      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
-    
-      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
-           UI != E; ) {
-        Value::use_iterator Use = UI++;
-        if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
-          // Remove one entry from each PHI.
-          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
-            PN->setOperand(Use.getOperandNo(), NewBB);
-        }
-      }
+  {
+    unsigned BBIdx = 0;
+    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+      // We no longer enter through TIBB, now we come in through NewBB.
+      // Revector exactly one entry in the PHI node that used to come from
+      // TIBB to come from NewBB.
+      PHINode *PN = cast<PHINode>(I);
+
+      // Reuse the previous value of BBIdx if it lines up.  In cases where we
+      // have multiple phi nodes with *lots* of predecessors, this is a speed
+      // win because we don't have to scan the PHI looking for TIBB.  This
+      // happens because the BB list of PHI nodes are usually in the same
+      // order.
+      if (PN->getIncomingBlock(BBIdx) != TIBB)
+	BBIdx = PN->getBasicBlockIndex(TIBB);
+      PN->setIncomingBlock(BBIdx, NewBB);
     }
   }
    
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 5b76bb26e404..204c2c63e1a5 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,7 +5,6 @@ add_llvm_library(LLVMTransformUtils
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
   CloneFunction.cpp
-  CloneLoop.cpp
   CloneModule.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
@@ -15,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
   Local.cpp
   LoopSimplify.cpp
   LoopUnroll.cpp
+  LowerExpectIntrinsic.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d967ceb96856..6ea831f5345b 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
                               bool ModuleLevelChanges,
                               ClonedCodeInfo *CodeInfo) {
-  std::vector<const Type*> ArgTypes;
+  std::vector<Type*> ArgTypes;
 
   // The user might be deleting arguments to the function by specifying them in
   // the VMap.  If so, we need to not add the arguments to the arg ty vector
@@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                   Ops.size(), TD);
 }
 
-static DebugLoc
-UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
-                    LLVMContext &Ctx) {
-  DebugLoc NewLoc = TheCallDL;
-  if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
-    NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
-                                 Ctx);
-
-  return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
-                       InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
-}
-
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
 /// effect of this is to copy significantly less code in cases where (for
@@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       // Skip over all PHI nodes, remembering them for later.
       BasicBlock::const_iterator OldI = BI->begin();
-      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
-        if (I->hasMetadata()) {
-          if (!TheCallDL.isUnknown()) {
-            DebugLoc IDL = I->getDebugLoc();
-            if (!IDL.isUnknown()) {
-              DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                   I->getContext());
-              I->setDebugLoc(NewDL);
-            }
-          } else {
-            // The cloned instruction has dbg info but the call instruction
-            // does not have dbg info. Remove dbg info from cloned instruction.
-            I->setDebugLoc(DebugLoc());
-          }
-        }
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
         PHIToResolve.push_back(cast<PHINode>(OldI));
-      }
     }
     
-    // FIXME:
-    // FIXME:
-    // FIXME: Unclone all this metadata stuff.
-    // FIXME:
-    // FIXME:
-    
     // Otherwise, remap the rest of the instructions normally.
-    for (; I != NewBB->end(); ++I) {
-      if (I->hasMetadata()) {
-        if (!TheCallDL.isUnknown()) {
-          DebugLoc IDL = I->getDebugLoc();
-          if (!IDL.isUnknown()) {
-            DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                 I->getContext());
-            I->setDebugLoc(NewDL);
-          }
-        } else {
-          // The cloned instruction has dbg info but the call instruction
-          // does not have dbg info. Remove dbg info from cloned instruction.
-          I->setDebugLoc(DebugLoc());
-        }
-      }
+    for (; I != NewBB->end(); ++I)
       RemapInstruction(I, VMap,
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
-    }
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -572,12 +524,12 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
     
-    // Move all the instructions in the succ to the pred.
-    I->getInstList().splice(I->end(), Dest->getInstList());
-    
     // Make all PHI nodes that referred to Dest now refer to I as their source.
     Dest->replaceAllUsesWith(I);
 
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
     // Remove the dest block.
     Dest->eraseFromParent();
     
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
deleted file mode 100644
index 87dd14153a19..000000000000
--- a/lib/Transforms/Utils/CloneLoop.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CloneLoop interface which makes a copy of a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
-
-
-using namespace llvm;
-
-/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
-/// that the basic block is already cloned.
-static void CloneDominatorInfo(BasicBlock *BB, 
-                               ValueToValueMapTy &VMap,
-                               DominatorTree *DT) {
-
-  assert (DT && "DominatorTree is not available");
-  ValueToValueMapTy::iterator BI = VMap.find(BB);
-  assert (BI != VMap.end() && "BasicBlock clone is missing");
-  BasicBlock *NewBB = cast<BasicBlock>(BI->second);
-
-  // NewBB already got dominator info.
-  if (DT->getNode(NewBB))
-    return;
-
-  assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
-  // Entry block is not expected here. Infinite loops are not to cloned.
-  assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
-  BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
-
-  // NewBB's dominator is either BB's dominator or BB's dominator's clone.
-  BasicBlock *NewBBDom = BBDom;
-  ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
-  if (BBDomI != VMap.end()) {
-    NewBBDom = cast<BasicBlock>(BBDomI->second);
-    if (!DT->getNode(NewBBDom))
-      CloneDominatorInfo(BBDom, VMap, DT);
-  }
-  DT->addNewBlock(NewBB, NewBBDom);
-}
-
-/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
-/// using old blocks to new blocks mapping.
-Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
-                      ValueToValueMapTy &VMap, Pass *P) {
-  
-  DominatorTree *DT = NULL;
-  if (P)
-    DT = P->getAnalysisIfAvailable<DominatorTree>();
-
-  SmallVector<BasicBlock *, 16> NewBlocks;
-
-  // Populate loop nest.
-  SmallVector<Loop *, 8> LoopNest;
-  LoopNest.push_back(OrigL);
-
-
-  Loop *NewParentLoop = NULL;
-  do {
-    Loop *L = LoopNest.pop_back_val();
-    Loop *NewLoop = new Loop();
-
-    if (!NewParentLoop)
-      NewParentLoop = NewLoop;
-
-    LPM->insertLoop(NewLoop, L->getParentLoop());
-
-    // Clone Basic Blocks.
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-         I != E; ++I) {
-      BasicBlock *BB = *I;
-      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
-      VMap[BB] = NewBB;
-      if (P)
-        LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
-      NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-      NewBlocks.push_back(NewBB);
-    }
-
-    // Clone dominator info.
-    if (DT)
-      for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-           I != E; ++I) {
-        BasicBlock *BB = *I;
-        CloneDominatorInfo(BB, VMap, DT);
-      }
-
-    // Process sub loops
-    for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-      LoopNest.push_back(*I);
-  } while (!LoopNest.empty());
-
-  // Remap instructions to reference operands from VMap.
-  for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), 
-        NBE = NewBlocks.end();  NBItr != NBE; ++NBItr) {
-    BasicBlock *NB = *NBItr;
-    for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); 
-        BI != BE; ++BI) {
-      Instruction *Insn = BI;
-      for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
-           index != num_ops; ++index) {
-        Value *Op = Insn->getOperand(index);
-        ValueToValueMapTy::iterator OpItr = VMap.find(Op);
-        if (OpItr != VMap.end())
-          Insn->setOperand(index, OpItr->second);
-      }
-    }
-  }
-
-  BasicBlock *Latch = OrigL->getLoopLatch();
-  Function *F = Latch->getParent();
-  F->getBasicBlockList().insert(OrigL->getHeader(), 
-                                NewBlocks.begin(), NewBlocks.end());
-
-
-  return NewParentLoop;
-}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1046c38ec01d..a08fa35065cc 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Module.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
@@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) {
   return CloneModule(M, VMap);
 }
 
-Module *llvm::CloneModule(const Module *M,
-                          ValueToValueMapTy &VMap) {
-  // First off, we need to create the new module...
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module.
   Module *New = new Module(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
-
-  // Copy all of the type symbol table entries over.
-  const TypeSymbolTable &TST = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
-       TI != TE; ++TI)
-    New->addTypeName(TI->first, TI->second);
-  
+   
   // Copy all of the dependent libraries over.
   for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
     New->addLibrary(*I);
@@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
-      GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 VMap, RF_None)));
+      GV->setInitializer(MapValue(I->getInitializer(), VMap));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
-    if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
+    if (const Constant *C = I->getAliasee())
+      GA->setAliasee(MapValue(C, VMap));
   }
 
   // And named metadata....
@@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M,
     const NamedMDNode &NMD = *I;
     NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
     for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
-      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
-                                               RF_None)));
+      NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
   }
 
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 8c133ea7f560..081352358b95 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
-  std::vector<const Type*> paramTy;
+  std::vector<Type*> paramTy;
 
   // Add the types of the input values to the function's argument list
   for (Values::const_iterator i = inputs.begin(),
@@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   }
 
   DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
-  for (std::vector<const Type*>::iterator i = paramTy.begin(),
+  for (std::vector<Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
     DEBUG(dbgs() << **i << ", ");
   DEBUG(dbgs() << ")\n");
@@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   AllocaInst *Struct = 0;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    std::vector<const Type*> ArgTypes;
+    std::vector<Type*> ArgTypes;
     for (Values::iterator v = StructValues.begin(),
            ve = StructValues.end(); v != ve; ++v)
       ArgTypes.push_back((*v)->getType());
@@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   }
 
   // Emit the call to the function
-  CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+  CallInst *call = CallInst::Create(newFunction, params,
                                     NumExitBlocks > 1 ? "targetBlock" : "");
   codeReplacer->getInstList().push_back(call);
 
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 8416170d9032..d5b382e55e5c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -449,11 +449,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
       for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
         NewSelector.push_back(Outer->getArgOperand(i));
 
-      CallInst *NewInner = CallInst::Create(Inner->getCalledValue(),
-                                            NewSelector.begin(),
-                                            NewSelector.end(),
-                                            "",
-                                            Inner);
+      CallInst *NewInner =
+        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
       // No need to copy attributes, calling convention, etc.
       NewInner->takeName(Inner);
       Inner->replaceAllUsesWith(NewInner);
@@ -489,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     InvokeInst *II =
       InvokeInst::Create(CI->getCalledValue(), Split,
                          Invoke.getOuterUnwindDest(),
-                         InvokeArgs.begin(), InvokeArgs.end(),
-                         CI->getName(), BB);
+                         InvokeArgs, CI->getName(), BB);
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
     
@@ -664,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   LLVMContext &Context = Arg->getContext();
 
-  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
   // Create the alloca.  If we have TargetData, use nice alignment.
   unsigned Align = 1;
@@ -681,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
   // Emit a memcpy.
-  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
   Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                  Intrinsic::memcpy, 
-                                                 Tys, 3);
+                                                 Tys);
   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
   Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
   
@@ -703,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     ConstantInt::get(Type::getInt32Ty(Context), 1),
     ConstantInt::getFalse(Context) // isVolatile
   };
-  CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
   
   // Uses of the argument in the function should use our new alloca
   // instead.
@@ -734,17 +730,52 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
   if (AI->getType() == Int8PtrTy)
     return isUsedByLifetimeMarker(AI);
 
-  // Do a scan to find all the bitcasts to i8*.
+  // Do a scan to find all the casts to i8*.
   for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
        ++I) {
     if (I->getType() != Int8PtrTy) continue;
-    if (!isa<BitCastInst>(*I)) continue;
+    if (I->stripPointerCasts() != AI) continue;
     if (isUsedByLifetimeMarker(*I))
       return true;
   }
   return false;
 }
 
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
+                                    const DebugLoc &InlinedAtDL,
+                                    LLVMContext &Ctx) {
+  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+    DebugLoc NewInlinedAtDL 
+      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                         NewInlinedAtDL.getAsMDNode(Ctx));
+  }
+                                             
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                       InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+                              Instruction *TheCall) {
+  DebugLoc TheCallDL = TheCall->getDebugLoc();
+  if (TheCallDL.isUnknown())
+    return;
+
+  for (; FI != Fn->end(); ++FI) {
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      DebugLoc DL = BI->getDebugLoc();
+      if (!DL.isUnknown())
+        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+    }
+  }
+}
+
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -847,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+    // Update inlined instructions' line number information.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
   }
 
   // If there are any alloca instructions in the block that used to be the entry
@@ -920,13 +954,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
 
     // Insert the llvm.stacksave.
-    CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
-                                          FirstNewBlock->begin());
+    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+      .CreateCall(StackSave, "savedstack");
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
-      CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
+      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
     }
 
     // Count the number of StackRestore calls we insert.
@@ -938,7 +972,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       for (Function::iterator BB = FirstNewBlock, E = Caller->end();
            BB != E; ++BB)
         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-          CallInst::Create(StackRestore, SavedPtr, "", UI);
+          IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
           ++NumStackRestores;
         }
     }
@@ -1098,15 +1132,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
     }
 
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
     // Splice the code from the return block into the block that it will return
     // to, which contains the code that was after the call.
-    BasicBlock *ReturnBB = Returns[0]->getParent();
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
-    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
-    ReturnBB->replaceAllUsesWith(AfterCallBB);
-
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
@@ -1126,8 +1160,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
 
   // Splice the code entry block into calling block, right before the
   // unconditional branch.
-  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
 
   // Remove the unconditional branch.
   OrigBB->getInstList().erase(Br);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 3bdbaa5c09db..0f6d9ae99d66 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -427,10 +427,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   BasicBlock *PredBB = DestBB->getSinglePredecessor();
   assert(PredBB && "Block doesn't have a single predecessor!");
   
-  // Splice all the instructions from PredBB to DestBB.
-  PredBB->getTerminator()->eraseFromParent();
-  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
-
   // Zap anything that took the address of DestBB.  Not doing this will give the
   // address an invalid value.
   if (DestBB->hasAddressTaken()) {
@@ -445,6 +441,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
   if (P) {
     DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
     if (DT) {
@@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
 
 /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
 /// unconditional branch, and contains no instructions other than PHI nodes,
-/// potential debug intrinsics and the branch.  If possible, eliminate BB by
-/// rewriting all the predecessors to branch to the successor block and return
-/// true.  If we can't transform, return false.
+/// potential side-effect free intrinsics and the branch.  If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true.  If we can't transform, return false.
 bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   assert(BB != &BB->getParent()->getEntryBlock() &&
          "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
@@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
     }
   }
   
-  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-    if (Succ->getSinglePredecessor()) {
-      // BB is the only predecessor of Succ, so Succ will end up with exactly
-      // the same predecessors BB had.
-      Succ->getInstList().splice(Succ->begin(),
-                                 BB->getInstList(), BB->begin());
-    } else {
+  if (Succ->getSinglePredecessor()) {
+    // BB is the only predecessor of Succ, so Succ will end up with exactly
+    // the same predecessors BB had.
+
+    // Copy over any phi, debug or lifetime instruction.
+    BB->getTerminator()->eraseFromParent();
+    Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+  } else {
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
       // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
       assert(PN->use_empty() && "There shouldn't be any uses here!");
       PN->eraseFromParent();
@@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
   bool Changed = false;
 
   // This implementation doesn't currently consider undef operands
-  // specially. Theroetically, two phis which are identical except for
+  // specially. Theoretically, two phis which are identical except for
   // one having an undef where the other doesn't could be collapsed.
 
   // Map from PHI hash values to PHI nodes. If multiple PHIs have
@@ -660,12 +662,17 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
     // them, which helps expose duplicates, but we have to check all the
     // operands to be safe in case instcombine hasn't run.
     uintptr_t Hash = 0;
+    // This hash algorithm is quite weak as hash functions go, but it seems
+    // to do a good enough job for this particular purpose, and is very quick.
     for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      // This hash algorithm is quite weak as hash functions go, but it seems
-      // to do a good enough job for this particular purpose, and is very quick.
       Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
       Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
     }
+    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+         I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
     // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
     Hash >>= 1;
     // If we've never seen this hash value before, it's a unique PHI.
@@ -706,39 +713,15 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
 ///
 static unsigned enforceKnownAlignment(Value *V, unsigned Align,
                                       unsigned PrefAlign) {
+  V = V->stripPointerCasts();
 
-  User *U = dyn_cast<User>(V);
-  if (!U) return Align;
-
-  switch (Operator::getOpcode(U)) {
-  default: break;
-  case Instruction::BitCast:
-    return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-  case Instruction::GetElementPtr: {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
-      if (!isa<Constant>(*i) ||
-          !cast<Constant>(*i)->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-    }
-    return Align;
-  }
-  case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(V);
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     // If there is a requested alignment and if this is an alloca, round up.
     if (AI->getAlignment() >= PrefAlign)
       return AI->getAlignment();
     AI->setAlignment(PrefAlign);
     return PrefAlign;
   }
-  }
 
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f02ffd20bca9..e79fb5ac21b4 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -375,6 +375,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
 
+  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
   DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
                << "\n");
 
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 7da7271e642c..6772511b5d5a 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -47,6 +47,14 @@ static inline void RemapInstruction(Instruction *I,
     if (It != VMap.end())
       I->setOperand(op, It->second);
   }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
 }
 
 /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
@@ -75,13 +83,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   // Delete the unconditional branch from the predecessor...
   OnlyPred->getInstList().pop_back();
 
-  // Move all definitions in the successor to the predecessor...
-  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(OnlyPred);
 
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
   std::string OldName = BB->getName();
 
   // Erase basic block from the function...
@@ -247,16 +255,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // the successor of the latch block.  The successor of the exit block will
       // be updated specially after unrolling all the way.
       if (*BB != LatchBlock)
-        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
-             UI != UE;) {
-          Instruction *UseInst = cast<Instruction>(*UI);
-          ++UI;
-          if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
-            PHINode *phi = cast<PHINode>(UseInst);
-            Value *Incoming = phi->getIncomingValueForBlock(*BB);
-            phi->addIncoming(Incoming, New);
-          }
-        }
+        for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
+             ++SI)
+          if (!L->contains(*SI))
+            for (BasicBlock::iterator BBI = (*SI)->begin();
+                 PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+              Value *Incoming = phi->getIncomingValueForBlock(*BB);
+              phi->addIncoming(Incoming, New);
+            }
 
       // Keep track of new headers and latches as we create them, so that
       // we can insert the proper branches later.
@@ -288,24 +294,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
   // successor blocks, update them to use the appropriate values computed as the
   // last iteration of the loop.
   if (Count != 1) {
-    SmallPtrSet<PHINode*, 8> Users;
-    for (Value::use_iterator UI = LatchBlock->use_begin(),
-         UE = LatchBlock->use_end(); UI != UE; ++UI)
-      if (PHINode *phi = dyn_cast<PHINode>(*UI))
-        Users.insert(phi);
-    
     BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
-    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+    for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
          SI != SE; ++SI) {
-      PHINode *PN = *SI;
-      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
-      // If this value was defined in the loop, take the value defined by the
-      // last iteration of the loop.
-      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
-        if (L->contains(InValI))
-          InVal = LastValueMap[InVal];
+      for (BasicBlock::iterator BBI = (*SI)->begin();
+           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+        Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+        // If this value was defined in the loop, take the value defined by the
+        // last iteration of the loop.
+        if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+          if (L->contains(InValI))
+            InVal = LastValueMap[InVal];
+        }
+        PN->addIncoming(InVal, LastIterationBB);
       }
-      PN->addIncoming(InVal, LastIterationBB);
     }
   }
 
@@ -352,11 +354,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // Replace the conditional branch with an unconditional one.
       BranchInst::Create(Dest, Term);
       Term->eraseFromParent();
-      // Merge adjacent basic blocks, if possible.
-      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+    }
+  }
+
+  // Merge adjacent basic blocks, if possible.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+    if (Term->isUnconditional()) {
+      BasicBlock *Dest = Term->getSuccessor(0);
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
-        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
-      }
     }
   }
   
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 000000000000..c1213fac7bc7
--- /dev/null
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+                   cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+                   cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+  class LowerExpectIntrinsic : public FunctionPass {
+
+    bool HandleSwitchExpect(SwitchInst *SI);
+
+    bool HandleIfExpect(BranchInst *BI);
+
+  public:
+    static char ID;
+    LowerExpectIntrinsic() : FunctionPass(ID) {
+      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+
+  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  std::vector<Value *> Vec;
+  unsigned n = SI->getNumCases();
+  Vec.resize(n + 1); // +1 for MDString
+
+  Vec[0] = MDString::get(Context, "branch_weights");
+  for (unsigned i = 0; i < n; ++i) {
+    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+  }
+
+  MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+  SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  SI->setCondition(ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+  if (BI->isUnconditional())
+    return false;
+
+  // Handle non-optimized IR code like:
+  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %tobool = icmp ne i64 %expval, 0
+  //   br i1 %tobool, label %if.then, label %if.end
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+    return false;
+
+  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  bool Likely = ExpectedValue->isOne();
+
+  // If expect value is equal to 1 it means that we are more likely to take
+  // branch 0, in other case more likely is branch 1.
+  Value *Ops[] = {
+    MDString::get(Context, "branch_weights"),
+    ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+    ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+  };
+
+  MDNode *WeightsNode = MDNode::get(Context, Ops);
+  BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  CmpI->setOperand(0, ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // Create "block_weights" metadata.
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      if (HandleIfExpect(BI))
+        IfHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      if (HandleSwitchExpect(SI))
+        IfHandled++;
+    }
+
+    // remove llvm.expect intrinsics.
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ) {
+      CallInst *CI = dyn_cast<CallInst>(BI++);
+      if (!CI)
+        continue;
+
+      Function *Fn = CI->getCalledFunction();
+      if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+        Value *Exp = CI->getArgOperand(0);
+        CI->replaceAllUsesWith(Exp);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
+  return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+                "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+  return new LowerExpectIntrinsic();
+}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 025ae0d61696..f77d19de900d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -66,7 +66,7 @@ namespace {
     Constant *AbortFn;
 
     // Used for expensive EH support.
-    const Type *JBLinkTy;
+    StructType *JBLinkTy;
     GlobalVariable *JBListHead;
     Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
     bool useExpensiveEHSupport;
@@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy =
-          Type::getInt8PtrTy(M.getContext());
+  const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
-    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
-    { // The type is recursive, so use a type holder.
-      std::vector<const Type*> Elements;
-      Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get(M.getContext());
-      Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
-      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
-      JBLinkTy = JBLType.get();
-      M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
-    }
+    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+    JBLinkTy->setBody(Elts);
 
     const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
 
@@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
       CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs.begin(), CallArgs.end(),
-                                           "",II);
+                                           CallArgs, "", II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   // Insert a normal call instruction.
   SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
   CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                       CallArgs.begin(), CallArgs.end(), "",
-                                       II);
+                                       CallArgs, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
@@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
              Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
   Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-  CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
+  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
   new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a1736b931fb4..e5a00f4e9774 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -92,6 +93,22 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
         return false;   // Don't allow a store OF the AI, only INTO the AI.
       if (SI->isVolatile())
         return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
     } else {
       return false;
     }
@@ -335,6 +352,31 @@ namespace {
   };
 }  // end of anonymous namespace
 
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
+
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
+
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
+    }
+    I->eraseFromParent();
+  }
+}
 
 void PromoteMem2Reg::run() {
   Function &F = *DT.getRoot()->getParent();
@@ -353,6 +395,8 @@ void PromoteMem2Reg::run() {
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
+    removeLifetimeIntrinsicUsers(AI);
+
     if (AI->use_empty()) {
       // If there are no uses of the alloca, just delete it now.
       if (AST) AST->deleteValue(AI);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b336194a35e3..b47a7ccd80ba 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
@@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
 
 LoadAndStorePromoter::
 LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
-                     SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB,
-                     StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) {
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
   
   Value *SomeVal;
@@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     if (BlockUses.size() == 1) {
       // If it is a store, it is a trivial def of the value in the block.
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
         SSA.AddAvailableValue(BB, SI->getOperand(0));
       } else 
         // Otherwise it is a load, queue it to rewrite as a live-in load.
@@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
       if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
         // If this is a store to an unrelated pointer, ignore it.
         if (!isInstInList(SI, Insts)) continue;
-
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
 
         // Remember that this is the active value in the block.
         StoredValue = SI->getOperand(0);
@@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     instructionDeleted(User);
     User->eraseFromParent();
   }
-
-  if (DDI)
-    DDI->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6df846cbd18f..9d9c324b8468 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
       SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
       Builder.SetInsertPoint(BI);
       CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                        Args.begin(), Args.end(),
-                                        II->getName());
+                                        Args, II->getName());
       CI->setCallingConv(II->getCallingConv());
       CI->setAttributes(II->getAttributes());
       // If the invoke produced a value, the Call now does instead.
@@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
         Builder.SetInsertPoint(BI);
         CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                          Args.begin(), Args.end(),
-                                          II->getName());
+                                          Args, II->getName());
         CI->setCallingConv(II->getCallingConv());
         CI->setAttributes(II->getAttributes());
         // If the invoke produced a value, the call does now instead.
@@ -2450,6 +2448,77 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   return !DeadCases.empty();
 }
 
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+                                              BasicBlock *BB,
+                                              int *PhiIndex) {
+  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+    return NULL; // BB must be empty to be a candidate for simplification.
+  if (!BB->getSinglePredecessor())
+    return NULL; // BB must be dominated by the switch.
+
+  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!Branch || !Branch->isUnconditional())
+    return NULL; // Terminator must be unconditional branch.
+
+  BasicBlock *Succ = Branch->getSuccessor(0);
+
+  BasicBlock::iterator I = Succ->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+    Value *InValue = PHI->getIncomingValue(Idx);
+    if (InValue != CaseValue) continue;
+
+    *PhiIndex = Idx;
+    return PHI;
+  }
+
+  return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+  typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+  ForwardingNodesMap ForwardingNodes;
+
+  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+    ConstantInt *CaseValue = SI->getCaseValue(I);
+    BasicBlock *CaseDest = SI->getSuccessor(I);
+
+    int PhiIndex;
+    PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+                                                 &PhiIndex);
+    if (!PHI) continue;
+
+    ForwardingNodes[PHI].push_back(PhiIndex);
+  }
+
+  bool Changed = false;
+
+  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+       E = ForwardingNodes.end(); I != E; ++I) {
+    PHINode *Phi = I->first;
+    SmallVector<int,4> &Indexes = I->second;
+
+    if (Indexes.size() < 2) continue;
+
+    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   // If this switch is too complex to want to look at, ignore it.
   if (!isValueEqualityComparison(SI))
@@ -2486,6 +2555,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (EliminateDeadSwitchCases(SI))
     return SimplifyCFG(BB) | true;
 
+  if (ForwardSwitchConditionToPHI(SI))
+    return SimplifyCFG(BB) | true;
+
   return false;
 }
 
@@ -2530,7 +2602,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   BasicBlock *BB = BI->getParent();
   
   // If the Terminator is the only non-phi instruction, simplify the block.
-  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
       TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a73bf0449813..973b105a1cbb 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,15 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
-                      RemapFlags Flags) {
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+                      ValueMapTypeRemapper *TypeMapper) {
   ValueToValueMapTy::iterator I = VM.find(V);
   
   // If the value already exists in the map, use it.
@@ -29,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
   
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+  if (isa<GlobalValue>(V) || isa<MDString>(V))
     return VM[V] = const_cast<Value*>(V);
+  
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    // Inline asm may need *type* remapping.
+    FunctionType *NewTy = IA->getFunctionType();
+    if (TypeMapper) {
+      NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+      if (NewTy != IA->getFunctionType())
+        V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+                           IA->hasSideEffects(), IA->isAlignStack());
+    }
+    
+    return VM[V] = const_cast<Value*>(V);
+  }
+  
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
     // If this is a module-level metadata and we know that nothing at the module
@@ -45,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
+      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
 
       // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
       }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);
@@ -75,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     return 0;
   
   if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
-    Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+    Function *F = 
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
     BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
-                                                       Flags));
+                                                       Flags, TypeMapper));
     return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
   }
   
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-    Value *Op = C->getOperand(i);
-    Value *Mapped = MapValue(Op, VM, Flags);
-    if (Mapped == C) continue;
-    
-    // Okay, the operands don't all match.  We've already processed some or all
-    // of the operands, set them up now.
-    std::vector<Constant*> Ops;
-    Ops.reserve(C->getNumOperands());
-    for (unsigned j = 0; j != i; ++j)
-      Ops.push_back(cast<Constant>(C->getOperand(i)));
+  // Otherwise, we have some other constant to remap.  Start by checking to see
+  // if all operands have an identity remapping.
+  unsigned OpNo = 0, NumOperands = C->getNumOperands();
+  Value *Mapped = 0;
+  for (; OpNo != NumOperands; ++OpNo) {
+    Value *Op = C->getOperand(OpNo);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    if (Mapped != C) break;
+  }
+  
+  // See if the type mapper wants to remap the type as well.
+  Type *NewTy = C->getType();
+  if (TypeMapper)
+    NewTy = TypeMapper->remapType(NewTy);
+
+  // If the result type and all operands match up, then just insert an identity
+  // mapping.
+  if (OpNo == NumOperands && NewTy == C->getType())
+    return VM[V] = C;
+  
+  // Okay, we need to create a new constant.  We've already processed some or
+  // all of the operands, set them all up now.
+  SmallVector<Constant*, 8> Ops;
+  Ops.reserve(NumOperands);
+  for (unsigned j = 0; j != OpNo; ++j)
+    Ops.push_back(cast<Constant>(C->getOperand(j)));
+  
+  // If one of the operands mismatch, push it and the other mapped operands.
+  if (OpNo != NumOperands) {
     Ops.push_back(cast<Constant>(Mapped));
-    
+  
     // Map the rest of the operands that aren't processed yet.
-    for (++i; i != e; ++i)
-      Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
-    
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      return VM[V] = CE->getWithOperands(Ops);
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
-      return VM[V] = ConstantArray::get(CA->getType(), Ops);
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
-      return VM[V] = ConstantStruct::get(CS->getType(), Ops);
-    assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
-    return VM[V] = ConstantVector::get(Ops);
+    for (++OpNo; OpNo != NumOperands; ++OpNo)
+      Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+                             Flags, TypeMapper));
   }
-
-  // If we reach here, all of the operands of the constant match.
-  return VM[V] = C;
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return VM[V] = CE->getWithOperands(Ops, NewTy);
+  if (isa<ConstantArray>(C))
+    return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+  if (isa<ConstantStruct>(C))
+    return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+  if (isa<ConstantVector>(C))
+    return VM[V] = ConstantVector::get(Ops);
+  // If this is a no-operand constant, it must be because the type was remapped.
+  if (isa<UndefValue>(C))
+    return VM[V] = UndefValue::get(NewTy);
+  if (isa<ConstantAggregateZero>(C))
+    return VM[V] = ConstantAggregateZero::get(NewTy);
+  assert(isa<ConstantPointerNull>(C));
+  return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 ///
 void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
-                            RemapFlags Flags) {
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
   // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap, Flags);
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
     // If we aren't ignoring missing entries, assert that something happened.
     if (V != 0)
       *op = V;
@@ -128,14 +170,32 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
              "Referenced value not in value map!");
   }
 
-  // Remap attached metadata.
+  // Remap phi nodes' incoming blocks.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+      // If we aren't ignoring missing entries, assert that something happened.
+      if (V != 0)
+        PN->setIncomingBlock(i, cast<BasicBlock>(V));
+      else
+        assert((Flags & RF_IgnoreMissingEntries) &&
+               "Referenced block not in value map!");
+    }
+  }
+
+  // Remap attached metadata.  Don't bother remapping DebugLoc, it can never
+  // have mappings to do.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  I->getAllMetadata(MDs);
+  I->getAllMetadataOtherThanDebugLoc(MDs);
   for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
        MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
-    Value *Old = MI->second;
-    Value *New = MapValue(Old, VMap, Flags);
+    MDNode *Old = MI->second;
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
     if (New != Old)
-      I->setMetadata(MI->first, cast<MDNode>(New));
+      I->setMetadata(MI->first, New);
   }
+  
+  // If the instruction's type is being remapped, do so now.
+  if (TypeMapper)
+    I->mutateType(TypeMapper->remapType(I->getType()));
 }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 844284d09c72..94794c35fe0b 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -26,13 +26,11 @@
 #include "llvm/Operator.h"
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,11 +40,6 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<bool>
-EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden,
-                       cl::desc("Enable debug info comments"));
-
-
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 
@@ -140,60 +133,60 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
 // TypePrinting Class: Type printing machinery
 //===----------------------------------------------------------------------===//
 
-static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) {
-  return *static_cast<DenseMap<const Type *, std::string>*>(M);
-}
-
-void TypePrinting::clear() {
-  getTypeNamesMap(TypeNames).clear();
-}
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
+  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
+public:
 
-bool TypePrinting::hasTypeName(const Type *Ty) const {
-  return getTypeNamesMap(TypeNames).count(Ty);
-}
+  /// NamedTypes - The named types that are used by the current module.
+  std::vector<StructType*> NamedTypes;
+  
+  /// NumberedTypes - The numbered types, along with their value.
+  DenseMap<StructType*, unsigned> NumberedTypes;
+  
 
-void TypePrinting::addTypeName(const Type *Ty, const std::string &N) {
-  getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N));
-}
+  TypePrinting() {}
+  ~TypePrinting() {}
+  
+  void incorporateTypes(const Module &M);
+  
+  void print(Type *Ty, raw_ostream &OS);
+  
+  void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
 
 
-TypePrinting::TypePrinting() {
-  TypeNames = new DenseMap<const Type *, std::string>();
+void TypePrinting::incorporateTypes(const Module &M) {
+  M.findUsedStructTypes(NamedTypes);
+  
+  // The list of struct types we got back includes all the struct types, split
+  // the unnamed ones out to a numbering and remove the anonymous structs.
+  unsigned NextNumber = 0;
+  
+  std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+  for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+    StructType *STy = *I;
+    
+    // Ignore anonymous types.
+    if (STy->isAnonymous())
+      continue;
+    
+    if (STy->getName().empty())
+      NumberedTypes[STy] = NextNumber++;
+    else
+      *NextToUse++ = STy;
+  }
+    
+  NamedTypes.erase(NextToUse, NamedTypes.end());
 }
 
-TypePrinting::~TypePrinting() {
-  delete &getTypeNamesMap(TypeNames);
-}
 
 /// CalcTypeName - Write the specified type to the specified raw_ostream, making
 /// use of type names or up references to shorten the type name where possible.
-void TypePrinting::CalcTypeName(const Type *Ty,
-                                SmallVectorImpl<const Type *> &TypeStack,
-                                raw_ostream &OS, bool IgnoreTopLevelName) {
-  // Check to see if the type is named.
-  if (!IgnoreTopLevelName) {
-    DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames);
-    DenseMap<const Type *, std::string>::iterator I = TM.find(Ty);
-    if (I != TM.end()) {
-      OS << I->second;
-      return;
-    }
-  }
-
-  // Check to see if the Type is already on the stack...
-  unsigned Slot = 0, CurSize = TypeStack.size();
-  while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type
-
-  // This is another base case for the recursion.  In this case, we know
-  // that we have looped back to a type that we have previously visited.
-  // Generate the appropriate upreference to handle this.
-  if (Slot < CurSize) {
-    OS << '\\' << unsigned(CurSize-Slot);     // Here's the upreference
-    return;
-  }
-
-  TypeStack.push_back(Ty);    // Recursive case: Add us to the stack..
-
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:      OS << "void"; break;
   case Type::FloatTyID:     OS << "float"; break;
@@ -206,259 +199,96 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
-    break;
+    return;
 
   case Type::FunctionTyID: {
-    const FunctionType *FTy = cast<FunctionType>(Ty);
-    CalcTypeName(FTy->getReturnType(), TypeStack, OS);
+    FunctionType *FTy = cast<FunctionType>(Ty);
+    print(FTy->getReturnType(), OS);
     OS << " (";
     for (FunctionType::param_iterator I = FTy->param_begin(),
          E = FTy->param_end(); I != E; ++I) {
       if (I != FTy->param_begin())
         OS << ", ";
-      CalcTypeName(*I, TypeStack, OS);
+      print(*I, OS);
     }
     if (FTy->isVarArg()) {
       if (FTy->getNumParams()) OS << ", ";
       OS << "...";
     }
     OS << ')';
-    break;
+    return;
   }
   case Type::StructTyID: {
-    const StructType *STy = cast<StructType>(Ty);
-    if (STy->isPacked())
-      OS << '<';
-    OS << '{';
-    for (StructType::element_iterator I = STy->element_begin(),
-         E = STy->element_end(); I != E; ++I) {
-      OS << ' ';
-      CalcTypeName(*I, TypeStack, OS);
-      if (llvm::next(I) == STy->element_end())
-        OS << ' ';
-      else
-        OS << ',';
-    }
-    OS << '}';
-    if (STy->isPacked())
-      OS << '>';
-    break;
+    StructType *STy = cast<StructType>(Ty);
+    
+    if (STy->isAnonymous())
+      return printStructBody(STy, OS);
+
+    if (!STy->getName().empty())
+      return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+    
+    DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+    if (I != NumberedTypes.end())
+      OS << '%' << I->second;
+    else  // Not enumerated, print the hex address.
+      OS << "%\"type 0x" << STy << '\"';
+    return;
   }
   case Type::PointerTyID: {
-    const PointerType *PTy = cast<PointerType>(Ty);
-    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    PointerType *PTy = cast<PointerType>(Ty);
+    print(PTy->getElementType(), OS);
     if (unsigned AddressSpace = PTy->getAddressSpace())
       OS << " addrspace(" << AddressSpace << ')';
     OS << '*';
-    break;
+    return;
   }
   case Type::ArrayTyID: {
-    const ArrayType *ATy = cast<ArrayType>(Ty);
+    ArrayType *ATy = cast<ArrayType>(Ty);
     OS << '[' << ATy->getNumElements() << " x ";
-    CalcTypeName(ATy->getElementType(), TypeStack, OS);
+    print(ATy->getElementType(), OS);
     OS << ']';
-    break;
+    return;
   }
   case Type::VectorTyID: {
-    const VectorType *PTy = cast<VectorType>(Ty);
+    VectorType *PTy = cast<VectorType>(Ty);
     OS << "<" << PTy->getNumElements() << " x ";
-    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    print(PTy->getElementType(), OS);
     OS << '>';
-    break;
+    return;
   }
-  case Type::OpaqueTyID:
-    OS << "opaque";
-    break;
   default:
     OS << "<unrecognized-type>";
-    break;
+    return;
   }
-
-  TypeStack.pop_back();       // Remove self from stack.
 }
 
-/// printTypeInt - The internal guts of printing out a type that has a
-/// potentially named portion.
-///
-void TypePrinting::print(const Type *Ty, raw_ostream &OS,
-                         bool IgnoreTopLevelName) {
-  // Check to see if the type is named.
-  DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames);
-  if (!IgnoreTopLevelName) {
-    DenseMap<const Type*, std::string>::iterator I = TM.find(Ty);
-    if (I != TM.end()) {
-      OS << I->second;
-      return;
-    }
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+  if (STy->isOpaque()) {
+    OS << "opaque";
+    return;
   }
-
-  // Otherwise we have a type that has not been named but is a derived type.
-  // Carefully recurse the type hierarchy to print out any contained symbolic
-  // names.
-  SmallVector<const Type *, 16> TypeStack;
-  std::string TypeName;
-
-  raw_string_ostream TypeOS(TypeName);
-  CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName);
-  OS << TypeOS.str();
-
-  // Cache type name for later use.
-  if (!IgnoreTopLevelName)
-    TM.insert(std::make_pair(Ty, TypeOS.str()));
-}
-
-namespace {
-  class TypeFinder {
-    // To avoid walking constant expressions multiple times and other IR
-    // objects, we keep several helper maps.
-    DenseSet<const Value*> VisitedConstants;
-    DenseSet<const Type*> VisitedTypes;
-
-    TypePrinting &TP;
-    std::vector<const Type*> &NumberedTypes;
-  public:
-    TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes)
-      : TP(tp), NumberedTypes(numberedTypes) {}
-
-    void Run(const Module &M) {
-      // Get types from the type symbol table.  This gets opaque types referened
-      // only through derived named types.
-      const TypeSymbolTable &ST = M.getTypeSymbolTable();
-      for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
-           TI != E; ++TI)
-        IncorporateType(TI->second);
-
-      // Get types from global variables.
-      for (Module::const_global_iterator I = M.global_begin(),
-           E = M.global_end(); I != E; ++I) {
-        IncorporateType(I->getType());
-        if (I->hasInitializer())
-          IncorporateValue(I->getInitializer());
-      }
-
-      // Get types from aliases.
-      for (Module::const_alias_iterator I = M.alias_begin(),
-           E = M.alias_end(); I != E; ++I) {
-        IncorporateType(I->getType());
-        IncorporateValue(I->getAliasee());
-      }
-
-      // Get types from functions.
-      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
-        IncorporateType(FI->getType());
-
-        for (Function::const_iterator BB = FI->begin(), E = FI->end();
-             BB != E;++BB)
-          for (BasicBlock::const_iterator II = BB->begin(),
-               E = BB->end(); II != E; ++II) {
-            const Instruction &I = *II;
-            // Incorporate the type of the instruction and all its operands.
-            IncorporateType(I.getType());
-            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
-                 OI != OE; ++OI)
-              IncorporateValue(*OI);
-          }
-      }
-    }
-
-  private:
-    void IncorporateType(const Type *Ty) {
-      // Check to see if we're already visited this type.
-      if (!VisitedTypes.insert(Ty).second)
-        return;
-
-      // If this is a structure or opaque type, add a name for the type.
-      if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
-            || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
-        TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
-        NumberedTypes.push_back(Ty);
-      }
-
-      // Recursively walk all contained types.
-      for (Type::subtype_iterator I = Ty->subtype_begin(),
-           E = Ty->subtype_end(); I != E; ++I)
-        IncorporateType(*I);
-    }
-
-    /// IncorporateValue - This method is used to walk operand lists finding
-    /// types hiding in constant expressions and other operands that won't be
-    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
-    /// inst operands are all explicitly enumerated.
-    void IncorporateValue(const Value *V) {
-      if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return;
-
-      // Already visited?
-      if (!VisitedConstants.insert(V).second)
-        return;
-
-      // Check this type.
-      IncorporateType(V->getType());
-
-      // Look in operands for types.
-      const Constant *C = cast<Constant>(V);
-      for (Constant::const_op_iterator I = C->op_begin(),
-           E = C->op_end(); I != E;++I)
-        IncorporateValue(*I);
-    }
-  };
-} // end anonymous namespace
-
-
-/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in
-/// the specified module to the TypePrinter and all numbered types to it and the
-/// NumberedTypes table.
-static void AddModuleTypesToPrinter(TypePrinting &TP,
-                                    std::vector<const Type*> &NumberedTypes,
-                                    const Module *M) {
-  if (M == 0) return;
-
-  // If the module has a symbol table, take all global types and stuff their
-  // names into the TypeNames map.
-  const TypeSymbolTable &ST = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
-       TI != E; ++TI) {
-    const Type *Ty = cast<Type>(TI->second);
-
-    // As a heuristic, don't insert pointer to primitive types, because
-    // they are used too often to have a single useful name.
-    if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-      const Type *PETy = PTy->getElementType();
-      if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
-          !PETy->isOpaqueTy())
-        continue;
+  
+  if (STy->isPacked())
+    OS << '<';
+  
+  if (STy->getNumElements() == 0) {
+    OS << "{}";
+  } else {
+    StructType::element_iterator I = STy->element_begin();
+    OS << "{ ";
+    print(*I++, OS);
+    for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+      OS << ", ";
+      print(*I, OS);
     }
-
-    // Likewise don't insert primitives either.
-    if (Ty->isIntegerTy() || Ty->isPrimitiveType())
-      continue;
-
-    // Get the name as a string and insert it into TypeNames.
-    std::string NameStr;
-    raw_string_ostream NameROS(NameStr);
-    formatted_raw_ostream NameOS(NameROS);
-    PrintLLVMName(NameOS, TI->first, LocalPrefix);
-    NameOS.flush();
-    TP.addTypeName(Ty, NameStr);
+  
+    OS << " }";
   }
-
-  // Walk the entire module to find references to unnamed structure and opaque
-  // types.  This is required for correctness by opaque types (because multiple
-  // uses of an unnamed opaque type needs to be referred to by the same ID) and
-  // it shrinks complex recursive structure types substantially in some cases.
-  TypeFinder(TP, NumberedTypes).Run(*M);
+  if (STy->isPacked())
+    OS << '>';
 }
 
 
-/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
-/// type, iff there is an entry in the modules symbol table for the specified
-/// type or one of it's component types.
-///
-void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) {
-  TypePrinting Printer;
-  std::vector<const Type*> NumberedTypes;
-  AddModuleTypesToPrinter(Printer, NumberedTypes, M);
-  Printer.print(Ty, OS);
-}
 
 //===----------------------------------------------------------------------===//
 // SlotTracker Class: Enumerate slot numbers for unnamed values
@@ -481,11 +311,11 @@ private:
   const Function* TheFunction;
   bool FunctionProcessed;
 
-  /// mMap - The TypePlanes map for the module level data.
+  /// mMap - The slot map for the module level data.
   ValueMap mMap;
   unsigned mNext;
 
-  /// fMap - The TypePlanes map for the function level data.
+  /// fMap - The slot map for the function level data.
   ValueMap fMap;
   unsigned fNext;
 
@@ -706,7 +536,7 @@ int SlotTracker::getGlobalSlot(const GlobalValue *V) {
   // Check for uninitialized state and do lazy initialization.
   initialize();
 
-  // Find the type plane in the module map
+  // Find the value in the module map
   ValueMap::iterator MI = mMap.find(V);
   return MI == mMap.end() ? -1 : (int)MI->second;
 }
@@ -716,7 +546,7 @@ int SlotTracker::getMetadataSlot(const MDNode *N) {
   // Check for uninitialized state and do lazy initialization.
   initialize();
 
-  // Find the type plane in the module map
+  // Find the MDNode in the module map
   mdn_iterator MI = mdnMap.find(N);
   return MI == mdnMap.end() ? -1 : (int)MI->second;
 }
@@ -978,7 +808,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     // As a special case, print the array as a string if it is an array of
     // i8 with ConstantInt values.
     //
-    const Type *ETy = CA->getType()->getElementType();
+    Type *ETy = CA->getType()->getElementType();
     if (CA->isString()) {
       Out << "c\"";
       PrintEscapedString(CA->getAsString(), Out);
@@ -1035,7 +865,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
   }
 
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
-    const Type *ETy = CP->getType()->getElementType();
+    Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
            "Number of operands for a PackedConst must be > 0");
     Out << '<';
@@ -1233,8 +1063,8 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
   if (Context == 0) Context = getModuleFromVal(V);
 
   TypePrinting TypePrinter;
-  std::vector<const Type*> NumberedTypes;
-  AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context);
+  if (Context)
+    TypePrinter.incorporateTypes(*Context);
   if (PrintType) {
     TypePrinter.print(V->getType(), Out);
     Out << ' ';
@@ -1251,14 +1081,14 @@ class AssemblyWriter {
   const Module *TheModule;
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
-  std::vector<const Type*> NumberedTypes;
   
 public:
   inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
                         const Module *M,
                         AssemblyAnnotationWriter *AAW)
     : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
-    AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
+    if (M)
+      TypePrinter.incorporateTypes(*M);
   }
 
   void printMDNodeBody(const MDNode *MD);
@@ -1271,7 +1101,7 @@ public:
 
   void writeAllMDNodes();
 
-  void printTypeSymbolTable(const TypeSymbolTable &ST);
+  void printTypeIdentities();
   void printGlobal(const GlobalVariable *GV);
   void printAlias(const GlobalAlias *GV);
   void printFunction(const Function *F);
@@ -1366,9 +1196,7 @@ void AssemblyWriter::printModule(const Module *M) {
     Out << " ]";
   }
 
-  // Loop over the symbol table, emitting all id'd types.
-  if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n';
-  printTypeSymbolTable(M->getTypeSymbolTable());
+  printTypeIdentities();
 
   // Output all globals.
   if (!M->global_empty()) Out << '\n';
@@ -1401,7 +1229,25 @@ void AssemblyWriter::printModule(const Module *M) {
 }
 
 void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
-  Out << "!" << NMD->getName() << " = !{";
+  Out << '!';
+  StringRef Name = NMD->getName();
+  if (Name.empty()) {
+    Out << "<empty name> ";
+  } else {
+    if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' ||
+        Name[0] == '.' || Name[0] == '_')
+      Out << Name[0];
+    else
+      Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+    for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+      unsigned char C = Name[i];
+      if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
+        Out << C;
+      else
+        Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+    }
+  }
+  Out << " = !{";
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
     if (i) Out << ", ";
     int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
@@ -1508,7 +1354,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
 
   const Constant *Aliasee = GA->getAliasee();
 
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+  if (Aliasee == 0) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << " <<NULL ALIASEE>>";
+  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
     TypePrinter.print(GV->getType(), Out);
     Out << ' ';
     PrintLLVMName(Out, GV);
@@ -1534,26 +1383,40 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   Out << '\n';
 }
 
-void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) {
+void AssemblyWriter::printTypeIdentities() {
+  if (TypePrinter.NumberedTypes.empty() &&
+      TypePrinter.NamedTypes.empty())
+    return;
+  
+  Out << '\n';
+  
+  // We know all the numbers that each type is used and we know that it is a
+  // dense assignment.  Convert the map to an index table.
+  std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+  for (DenseMap<StructType*, unsigned>::iterator I = 
+       TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+       I != E; ++I) {
+    assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+    NumberedTypes[I->second] = I->first;
+  }
+           
   // Emit all numbered types.
   for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
     Out << '%' << i << " = type ";
-
+    
     // Make sure we print out at least one level of the type structure, so
     // that we do not get %2 = type %2
-    TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out);
+    TypePrinter.printStructBody(NumberedTypes[i], Out);
     Out << '\n';
   }
-
-  // Print the named types.
-  for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end();
-       TI != TE; ++TI) {
-    PrintLLVMName(Out, TI->first, LocalPrefix);
+  
+  for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+    PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
     Out << " = type ";
 
     // Make sure we print out at least one level of the type structure, so
     // that we do not get %FILE = type %FILE
-    TypePrinter.printAtLeastOneLevel(TI->second, Out);
+    TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
     Out << '\n';
   }
 }
@@ -1735,18 +1598,6 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
-/// printDebugLoc - Print DebugLoc.
-static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
-  OS << DL.getLine() << ":" << DL.getCol();
-  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
-    DebugLoc IDL = DebugLoc::getFromDILocation(N);
-    if (!IDL.isUnknown()) {
-      OS << "@";
-      printDebugLoc(IDL,OS);
-    }
-  }
-}
-
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
@@ -1754,43 +1605,6 @@ void AssemblyWriter::printInfoComment(const Value &V) {
   if (AnnotationWriter) {
     AnnotationWriter->printInfoComment(V, Out);
     return;
-  } else if (EnableDebugInfoComment) {
-    bool Padded = false;
-    if (const Instruction *I = dyn_cast<Instruction>(&V)) {
-      const DebugLoc &DL = I->getDebugLoc();
-      if (!DL.isUnknown()) {
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        Out << " [debug line = ";
-        printDebugLoc(DL,Out);
-        Out << "]";
-      }
-      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
-        const MDNode *Var = DDI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-      else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
-        const MDNode *Var = DVI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-    }
   }
 }
 
@@ -1873,16 +1687,16 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       writeOperand(I.getOperand(i), true);
     }
     Out << ']';
-  } else if (isa<PHINode>(I)) {
+  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
     Out << ' ';
 
-    for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) {
+    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
       if (op) Out << ", ";
       Out << "[ ";
-      writeOperand(I.getOperand(op  ), false); Out << ", ";
-      writeOperand(I.getOperand(op+1), false); Out << " ]";
+      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
     }
   } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
     Out << ' ';
@@ -1916,9 +1730,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    const PointerType    *PTy = cast<PointerType>(Operand->getType());
-    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
-    const Type         *RetTy = FTy->getReturnType();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
     const AttrListPtr &PAL = CI->getAttributes();
 
     if (PAL.getRetAttributes() != Attribute::None)
@@ -1949,9 +1763,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
-    const PointerType    *PTy = cast<PointerType>(Operand->getType());
-    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
-    const Type         *RetTy = FTy->getReturnType();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
     const AttrListPtr &PAL = II->getAttributes();
 
     // Print the calling convention being used.
@@ -2034,7 +1848,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // omit the type from all but the first operand.  If the instruction has
     // different type operands (for example br), then they are all printed.
     bool PrintAllTypes = false;
-    const Type *TheType = Operand->getType();
+    Type *TheType = Operand->getType();
 
     // Select, Store and ShuffleVector always print all types.
     if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
@@ -2154,7 +1968,15 @@ void Type::print(raw_ostream &OS) const {
     OS << "<null Type>";
     return;
   }
-  TypePrinting().print(this, OS);
+  TypePrinting TP;
+  TP.print(const_cast<Type*>(this), OS);
+  
+  // If the type is a named struct type, print the body as well.
+  if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+    if (!STy->isAnonymous()) {
+      OS << " = type ";
+      TP.printStructBody(STy, OS);
+    }
 }
 
 void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
@@ -2210,14 +2032,7 @@ void Value::printCustom(raw_ostream &OS) const {
 void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
 
 // Type::dump - allow easy printing of Types from the debugger.
-// This one uses type names from the given context module
-void Type::dump(const Module *Context) const {
-  WriteTypeSymbolic(dbgs(), this, Context);
-  dbgs() << '\n';
-}
-
-// Type::dump - allow easy printing of Types from the debugger.
-void Type::dump() const { dump(0); }
+void Type::dump() const { print(dbgs()); }
 
 // Module::dump() - Allow printing of Modules from the debugger.
 void Module::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index ee257dbde5f3..bf6efa1645a2 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -74,6 +74,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "naked ";
   if (Attrs & Attribute::Hotpatch)
     Result += "hotpatch ";
+  if (Attrs & Attribute::NonLazyBind)
+    Result += "nonlazybind ";
   if (Attrs & Attribute::StackAlignment) {
     Result += "alignstack(";
     Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index f8f15caec91d..9e93ff370e25 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -28,558 +28,77 @@ using namespace llvm;
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
-  // Get the Function's name.
-  const std::string& Name = F->getName();
-
-  // Convenience
-  const FunctionType *FTy = F->getFunctionType();
-
   // Quickly eliminate it, if it's not a candidate.
-  if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || 
-      Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
+  StringRef Name = F->getName();
+  if (Name.size() <= 8 || !Name.startswith("llvm."))
     return false;
+  Name = Name.substr(5); // Strip off "llvm."
 
+  const FunctionType *FTy = F->getFunctionType();
   Module *M = F->getParent();
-  switch (Name[5]) {
+  
+  switch (Name[0]) {
   default: break;
-  case 'a':
-    // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
-    // and atomics with default address spaces to their new names to their new
-    // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
-    if (Name.compare(5,7,"atomic.",7) == 0) {
-      if (Name.compare(12,3,"lcs",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
-                   ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"las",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.add"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"lss",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.sub"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.rfind(".p") == std::string::npos) {
-        // We don't have an address space qualifier so this has be upgraded
-        // to the new name.  Copy the type name at the end of the intrinsic
-        // and add to it
-        std::string::size_type delim = Name.find_last_of('.');
-        assert(delim != std::string::npos && "can not find type");
-        F->setName(Name + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
-            Name.compare(14, 5, "vaddl", 5) == 0 ||
-            Name.compare(14, 5, "vsubl", 5) == 0 ||
-            Name.compare(14, 5, "vaddw", 5) == 0 ||
-            Name.compare(14, 5, "vsubw", 5) == 0 ||
-            Name.compare(14, 5, "vmlal", 5) == 0 ||
-            Name.compare(14, 5, "vmlsl", 5) == 0 ||
-            Name.compare(14, 5, "vabdl", 5) == 0 ||
-            Name.compare(14, 5, "vabal", 5) == 0) &&
-           (Name.compare(19, 2, "s.", 2) == 0 ||
-            Name.compare(19, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 4, "vaba", 4) == 0 &&
-           (Name.compare(18, 2, "s.", 2) == 0 ||
-            Name.compare(18, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
-
-        // Calls to these are transformed into IR without intrinsics.
-        NewFn = 0;
-        return true;
-      }
-      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
-      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
-      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
-      if (isVLd || isVSt) {
-        unsigned NumVecs = Name.at(17) - '0';
-        if (NumVecs == 0 || NumVecs > 4)
-          return false;
-        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
-        if (!isLaneOp && Name.at(18) != '.')
-          return false;
-        unsigned ExpectedArgs = 2; // for the address and alignment
-        if (isVSt || isLaneOp)
-          ExpectedArgs += NumVecs;
-        if (isLaneOp)
-          ExpectedArgs += 1; // for the lane number
-        unsigned NumP = FTy->getNumParams();
-        if (NumP != ExpectedArgs - 1)
-          return false;
-
-        // Change the name of the old (bad) intrinsic, because 
-        // its type is incorrect, but we cannot overload that name.
-        F->setName("");
-
-        // One argument is missing: add the alignment argument.
-        std::vector<const Type*> NewParams;
-        for (unsigned p = 0; p < NumP; ++p)
-          NewParams.push_back(FTy->getParamType(p));
-        NewParams.push_back(Type::getInt32Ty(F->getContext()));
-        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
-                                                 NewParams, false);
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
-        return true;
-      }
-    }
-    break;
-  case 'b':
-    //  This upgrades the name of the llvm.bswap intrinsic function to only use 
-    //  a single type name for overloading. We only care about the old format
-    //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
-    //  a '.' after 'bswap.'
-    if (Name.compare(5,6,"bswap.",6) == 0) {
-      std::string::size_type delim = Name.find('.',11);
-      
-      if (delim != std::string::npos) {
-        //  Construct the new name as 'llvm.bswap' + '.i*'
-        F->setName(Name.substr(0,10)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-    }
-    break;
+  case 'p':
+    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
+    //  which is a instruction / data cache identifier. The old version only
+    //  implicitly accepted the data version.
+    if (Name == "prefetch") {
+      // Don't do anything if it has the correct number of arguments already
+      if (FTy->getNumParams() == 4)
+        break;
 
-  case 'c':
-    //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
-    //  correct return type, so we check for the name, and then check if the 
-    //  return type does not match the parameter type.
-    if ( (Name.compare(5,5,"ctpop",5) == 0 ||
-          Name.compare(5,4,"ctlz",4) == 0 ||
-          Name.compare(5,4,"cttz",4) == 0) &&
-        FTy->getReturnType() != FTy->getParamType(0)) {
-      //  We first need to change the name of the old (bad) intrinsic, because 
-      //  its type is incorrect, but we cannot overload that name. We 
-      //  arbitrarily unique it here allowing us to construct a correctly named 
+      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
+      //  We first need to change the name of the old (bad) intrinsic, because
+      //  its type is incorrect, but we cannot overload that name. We
+      //  arbitrarily unique it here allowing us to construct a correctly named
       //  and typed function below.
+      std::string NameTmp = F->getName();
       F->setName("");
-
-      //  Now construct the new intrinsic with the correct name and type. We 
-      //  leave the old function around in order to query its type, whatever it 
-      //  may be, and correctly convert up to the new type.
-      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
+                                                    FTy->getReturnType(),
                                                     FTy->getParamType(0),
-                                                    FTy->getParamType(0),
-                                                    (Type *)0));
+                                                    FTy->getParamType(1),
+                                                    FTy->getParamType(2),
+                                                    FTy->getParamType(2),
+                                                    (Type*)0));
       return true;
     }
-    break;
 
-  case 'e':
-    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i32") == 0) {
-      F->setName("llvm.eh.selector");
-      NewFn = F;
-      return true;
-    }
-    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
-      F->setName("llvm.eh.typeid.for");
-      NewFn = F;
-      return true;
-    }
-    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
-      return true;
-    }
-    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
-      return true;
-    }
     break;
-
-  case 'm': {
-    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
-    // new format that allows overloading the pointer for different address
-    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
-    const char* NewFnName = NULL;
-    if (Name.compare(5,8,"memcpy.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
-    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
-      if (Name[14] == '8')
-        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
-      else if (Name.compare(14,2,"16") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
-      else if (Name.compare(14,2,"32") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
-      else if (Name.compare(14,2,"64") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
-    }
-    else if (Name.compare(5,8,"memset.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memset.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memset.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memset.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memset.p0i8.i64";
-    }
+  case 'x': {
+    const char *NewFnName = NULL;
+    // This fixes the poorly named crc32 intrinsics.
+    if (Name == "x86.sse42.crc32.8")
+      NewFnName = "llvm.x86.sse42.crc32.32.8";
+    else if (Name == "x86.sse42.crc32.16")
+      NewFnName = "llvm.x86.sse42.crc32.32.16";
+    else if (Name == "x86.sse42.crc32.32")
+      NewFnName = "llvm.x86.sse42.crc32.32.32";
+    else if (Name == "x86.sse42.crc64.8")
+      NewFnName = "llvm.x86.sse42.crc32.64.8";
+    else if (Name == "x86.sse42.crc64.64")
+      NewFnName = "llvm.x86.sse42.crc32.64.64";
+    
     if (NewFnName) {
-      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
-                                            FTy->getReturnType(),
-                                            FTy->getParamType(0),
-                                            FTy->getParamType(1),
-                                            FTy->getParamType(2),
-                                            FTy->getParamType(3),
-                                            Type::getInt1Ty(F->getContext()),
-                                            (Type *)0));
+      F->setName(NewFnName);
+      NewFn = F;
       return true;
     }
-    break;
-  }
-  case 'p':
-    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
-    //  use one type specifier in the name. We only care about the old format
-    //  'llvm.part.select.i*.i*', and solve as above with bswap.
-    if (Name.compare(5,12,"part.select.",12) == 0) {
-      std::string::size_type delim = Name.find('.',17);
-      
-      if (delim != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*'
-        F->setName(Name.substr(0,16)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
 
-    //  This upgrades the llvm.part.set intrinsics similarly as above, however 
-    //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
-    //  must match. There is an additional type specifier after these two 
-    //  matching types that we must retain when upgrading.  Thus, we require 
-    //  finding 2 periods, not just one, after the intrinsic name.
-    if (Name.compare(5,9,"part.set.",9) == 0) {
-      std::string::size_type delim = Name.find('.',14);
-
-      if (delim != std::string::npos &&
-          Name.find('.',delim+1) != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*.i*'
-        F->setName(Name.substr(0,13)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
-    break;
-  case 'x':
-    // This fixes the poorly named crc32 intrinsics
-    if (Name.compare(5, 13, "x86.sse42.crc", 13) == 0) {
-      const char* NewFnName = NULL;
-      if (Name.compare(18, 2, "32", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.32.8";
-        } else if (Name.compare(20, 3, ".16") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.16";
-        } else if (Name.compare(20, 3, ".32") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.32";
-        }
-      }
-      else if (Name.compare(18, 2, "64", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.64.8";
-        } else if (Name.compare(20, 3, ".64") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.64.64";
-        }
-      }
-      if (NewFnName) {
-        F->setName(NewFnName);
-        NewFn = F;
-        return true;
-      }
-    }
-
-    // This fixes all MMX shift intrinsic instructions to take a
-    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
-    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-
-      if (Name.compare(13, 4, "padd", 4) == 0   ||
-          Name.compare(13, 4, "psub", 4) == 0   ||
-          Name.compare(13, 4, "pmul", 4) == 0   ||
-          Name.compare(13, 5, "pmadd", 5) == 0  ||
-          Name.compare(13, 4, "pand", 4) == 0   ||
-          Name.compare(13, 3, "por", 3) == 0    ||
-          Name.compare(13, 4, "pxor", 4) == 0   ||
-          Name.compare(13, 4, "pavg", 4) == 0   ||
-          Name.compare(13, 4, "pmax", 4) == 0   ||
-          Name.compare(13, 4, "pmin", 4) == 0   ||
-          Name.compare(13, 4, "psad", 4) == 0   ||
-          Name.compare(13, 4, "psll", 4) == 0   ||
-          Name.compare(13, 4, "psrl", 4) == 0   ||
-          Name.compare(13, 4, "psra", 4) == 0   ||
-          Name.compare(13, 4, "pack", 4) == 0   ||
-          Name.compare(13, 6, "punpck", 6) == 0 ||
-          Name.compare(13, 4, "pcmp", 4) == 0) {
-        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
-        const Type *SecondParamTy = X86_MMXTy;
-
-        if (Name.compare(13, 5, "pslli", 5) == 0 ||
-            Name.compare(13, 5, "psrli", 5) == 0 ||
-            Name.compare(13, 5, "psrai", 5) == 0)
-          SecondParamTy = FTy->getParamType(1);
-
-        // Don't do anything if it has the correct types.
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == SecondParamTy)
-          break;
-
-        // We first need to change the name of the old (bad) intrinsic, because
-        // its type is incorrect, but we cannot overload that name. We
-        // arbitrarily unique it here allowing us to construct a correctly named
-        // and typed function below.
-        F->setName("");
-
-        // Now construct the new intrinsic with the correct name and type. We
-        // leave the old function around in order to query its type, whatever it
-        // may be, and correctly convert up to the new type.
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy, X86_MMXTy,
-                                                      SecondParamTy, (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
-        // Don't do anything if it has the correct types.
-        if (FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "movnt", 5) == 0) {
-        if (FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      FTy->getParamType(0),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 7, "palignr", 7) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pextr", 5) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pinsr", 5) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(0),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "vec.init", 8) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-
-        if (Name.compare(21, 2, ".b", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        FTy->getParamType(4),
-                                                        FTy->getParamType(5),
-                                                        FTy->getParamType(6),
-                                                        FTy->getParamType(7),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".w", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".d", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        (Type*)0));
-        return true;
-      }
-
-
-      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 9, "emms", 4) == 0 ||
-          Name.compare(13, 9, "femms", 5) == 0) {
-        NewFn = 0;
-        break;
-      }
-
-      // We really shouldn't get here ever.
-      assert(0 && "Invalid MMX intrinsic!");
-      break;
-    } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
-               Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
-               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
-               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
-               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
-      // Calls to these intrinsics are transformed into ShuffleVector's.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
-      // Calls to these intrinsics are transformed into vector multiplies.
-      NewFn = 0;
+    // Calls to these instructions are transformed into unaligned loads.
+    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
+        Name == "x86.sse2.loadu.pd")
       return true;
-    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
-               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
-      // Calls to these intrinsics are transformed into vector shuffles, shifts,
-      // or 0.
-      NewFn = 0;
-      return true;           
-    } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
-      // Calls to these instructions are transformed into unaligned loads.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
-      // Calls to these instructions are transformed into nontemporal stores.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
-      // This is an SSE/MMX instruction.
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-      NewFn =
-        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
-                                              X86_MMXTy,
-                                              X86_MMXTy,
-                                              Type::getInt8Ty(F->getContext()),
-                                              (Type*)0));
+      
+    // Calls to these instructions are transformed into nontemporal stores.
+    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
+        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
       return true;
-    }
 
     break;
   }
+  }
 
   //  This may not belong here. This function is effectively being overloaded 
   //  to both detect an intrinsic which needs upgrading, and to provide the 
@@ -601,105 +120,10 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
 }
 
 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  StringRef Name(GV->getName());
-
-  // We are only upgrading one symbol here.
-  if (Name == ".llvm.eh.catch.all.value") {
-    GV->setName("llvm.eh.catch.all.value");
-    return true;
-  }
-
+  // Nothing to do yet.
   return false;
 }
 
-/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
-/// have vector elements twice as big as one or both source operands, do the
-/// sign- or zero-extension that used to be handled by intrinsics.  The
-/// extended values are returned via V0 and V1.
-static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
-                           Value *&V0, Value *&V1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(19) == 's');
-
-  if (isSigned) {
-    if (isLong)
-      V0 = new SExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new SExtInst(Arg1, CI->getType(), "", CI);
-  } else {
-    if (isLong)
-      V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
-  }
-}
-
-/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
-/// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
-/// name of the old intrinsic to determine whether to use a signed or unsigned
-/// vabd intrinsic.  Get the type from the old call instruction, adjusted for
-/// half-size vector elements if the old intrinsic was vabdl or vabal.
-static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
-
-  Intrinsic::ID intID;
-  if (isSigned)
-    intID = Intrinsic::arm_neon_vabds;
-  else
-    intID = Intrinsic::arm_neon_vabdu;
-
-  const Type *Ty = CI->getType();
-  if (isLong)
-    Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
-
-  Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
-  Value *Operands[2];
-  Operands[0] = Arg0;
-  Operands[1] = Arg1;
-  return CallInst::Create(VABD, Operands, Operands+2, 
-                          "upgraded."+CI->getName(), CI);
-}
-
-/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
-static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
-                                 Value **Operands, unsigned NumOps,
-                                 bool AssignName = true) {
-  // Construct a new CallInst.
-  CallInst *NewCI =
-    CallInst::Create(NewFn, Operands, Operands + NumOps,
-                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
-
-  NewCI->setTailCall(OldCI->isTailCall());
-  NewCI->setCallingConv(OldCI->getCallingConv());
-
-  // Handle any uses of the old CallInst. If the type has changed, add a cast.
-  if (!OldCI->use_empty()) {
-    if (OldCI->getType() != NewCI->getType()) {
-      Function *OldFn = OldCI->getCalledFunction();
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 OldFn->getReturnType(), true),
-                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
-
-      // Replace all uses of the old call with the new cast which has the
-      // correct type.
-      OldCI->replaceAllUsesWith(RetCast);
-    } else {
-      OldCI->replaceAllUsesWith(NewCI);
-    }
-  }
-
-  // Clean up the old call now that it has been completely upgraded.
-  OldCI->eraseFromParent();
-}
-
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
@@ -711,284 +135,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
-    // Get the Function's name.
-    const std::string& Name = F->getName();
-
-    // Upgrade ARM NEON intrinsics.
-    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      Instruction *NewI;
-      Value *V0, *V1;
-      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
-        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
-        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vadd", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vsub", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmul", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmla", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vmls", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vabd", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
-        NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vaba", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
-        if (Name.at(18) == 'l')
-          NewI = new ZExtInst(NewI, CI->getType(), "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
-        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
-                             "upgraded." + CI->getName(), CI);
-      } else {
-        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
-      }
-      // Replace any uses of the old CallInst.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(NewI);
-      CI->eraseFromParent();
-      return;
-    }
-
-    bool isLoadH = false, isLoadL = false, isMovL = false;
-    bool isMovSD = false, isShufPD = false;
-    bool isUnpckhPD = false, isUnpcklPD = false;
-    bool isPunpckhQPD = false, isPunpcklQPD = false;
-    if (F->getName() == "llvm.x86.sse2.loadh.pd")
-      isLoadH = true;
-    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
-      isLoadL = true;
-    else if (F->getName() == "llvm.x86.sse2.movl.dq")
-      isMovL = true;
-    else if (F->getName() == "llvm.x86.sse2.movs.d")
-      isMovSD = true;
-    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
-      isShufPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
-      isUnpckhPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
-      isUnpcklPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
-      isPunpckhQPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
-      isPunpcklQPD = true;
-
-    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
-        isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-      std::vector<Constant*> Idxs;
-      Value *Op0 = CI->getArgOperand(0);
-      ShuffleVectorInst *SI = NULL;
-      if (isLoadH || isLoadL) {
-        Value *Op1 = UndefValue::get(Op0->getType());
-        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
-                                  Type::getDoublePtrTy(C),
-                                      "upgraded.", CI);
-        Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
-        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
-
-        if (isLoadH) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isMovL) {
-        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Value *ZeroV = ConstantVector::get(Idxs);
-
-        Idxs.clear(); 
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
-      } else if (isMovSD ||
-                 isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        if (isMovSD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        } else if (isUnpckhPD || isPunpckhQPD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isShufPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        unsigned MaskVal =
-                        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
-                                               ((MaskVal >> 1) & 1)+2));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      }
-
-      assert(SI && "Unexpected!");
-
-      // Handle any uses of the old CallInst.
-      if (!CI->use_empty())
-        //  Replace all uses of the old call with the new cast which has the 
-        //  correct type.
-        CI->replaceAllUsesWith(SI);
-      
-      //  Clean up the old call now that it has been completely upgraded.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
-      // Upgrade this set of intrinsics into vector multiplies.
-      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
-                                                   CI->getArgOperand(1),
-                                                   CI->getName(),
-                                                   CI);
-      // Fix up all the uses with our new multiply.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Mul);
-        
-      // Remove upgraded multiply.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 9 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 8) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 8);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 8> Indices;
-        for (unsigned i = 0; i != 8; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 8 but less
-      // than 16 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 16) {
-        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 1);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-      
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 17 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 16) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 16);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 16> Indices;
-        for (unsigned i = 0; i != 16; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 16 but less
-      // than 32 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 32) {
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 2);
-        const Type *IntTy = Type::getInt32Ty(C);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-    
-    } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-               F->getName() == "llvm.x86.sse2.loadu.dq" ||
-               F->getName() == "llvm.x86.sse2.loadu.pd") {
+    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+        F->getName() == "llvm.x86.sse2.loadu.dq" ||
+        F->getName() == "llvm.x86.sse2.loadu.pd") {
       // Convert to a native, unaligned load.
       const Type *VecTy = CI->getType();
       const Type *IntTy = IntegerType::get(C, 128);
@@ -1040,306 +189,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   }
 
   switch (NewFn->getIntrinsicID()) {
-  default: llvm_unreachable("Unknown function for CallInst upgrade.");
-  case Intrinsic::arm_neon_vld1:
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vst1:
-  case Intrinsic::arm_neon_vst2:
-  case Intrinsic::arm_neon_vst3:
-  case Intrinsic::arm_neon_vst4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
-  case Intrinsic::arm_neon_vst2lane:
-  case Intrinsic::arm_neon_vst3lane:
-  case Intrinsic::arm_neon_vst4lane: {
-    // Add a default alignment argument of 1.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }        
-
-  case Intrinsic::x86_mmx_padd_b:
-  case Intrinsic::x86_mmx_padd_w:
-  case Intrinsic::x86_mmx_padd_d:
-  case Intrinsic::x86_mmx_padd_q:
-  case Intrinsic::x86_mmx_padds_b:
-  case Intrinsic::x86_mmx_padds_w:
-  case Intrinsic::x86_mmx_paddus_b:
-  case Intrinsic::x86_mmx_paddus_w:
-  case Intrinsic::x86_mmx_psub_b:
-  case Intrinsic::x86_mmx_psub_w:
-  case Intrinsic::x86_mmx_psub_d:
-  case Intrinsic::x86_mmx_psub_q:
-  case Intrinsic::x86_mmx_psubs_b:
-  case Intrinsic::x86_mmx_psubs_w:
-  case Intrinsic::x86_mmx_psubus_b:
-  case Intrinsic::x86_mmx_psubus_w:
-  case Intrinsic::x86_mmx_pmulh_w:
-  case Intrinsic::x86_mmx_pmull_w:
-  case Intrinsic::x86_mmx_pmulhu_w:
-  case Intrinsic::x86_mmx_pmulu_dq:
-  case Intrinsic::x86_mmx_pmadd_wd:
-  case Intrinsic::x86_mmx_pand:
-  case Intrinsic::x86_mmx_pandn:
-  case Intrinsic::x86_mmx_por:
-  case Intrinsic::x86_mmx_pxor:
-  case Intrinsic::x86_mmx_pavg_b:
-  case Intrinsic::x86_mmx_pavg_w:
-  case Intrinsic::x86_mmx_pmaxu_b:
-  case Intrinsic::x86_mmx_pmaxs_w:
-  case Intrinsic::x86_mmx_pminu_b:
-  case Intrinsic::x86_mmx_pmins_w:
-  case Intrinsic::x86_mmx_psad_bw:
-  case Intrinsic::x86_mmx_psll_w:
-  case Intrinsic::x86_mmx_psll_d:
-  case Intrinsic::x86_mmx_psll_q:
-  case Intrinsic::x86_mmx_pslli_w:
-  case Intrinsic::x86_mmx_pslli_d:
-  case Intrinsic::x86_mmx_pslli_q:
-  case Intrinsic::x86_mmx_psrl_w:
-  case Intrinsic::x86_mmx_psrl_d:
-  case Intrinsic::x86_mmx_psrl_q:
-  case Intrinsic::x86_mmx_psrli_w:
-  case Intrinsic::x86_mmx_psrli_d:
-  case Intrinsic::x86_mmx_psrli_q:
-  case Intrinsic::x86_mmx_psra_w:
-  case Intrinsic::x86_mmx_psra_d:
-  case Intrinsic::x86_mmx_psrai_w:
-  case Intrinsic::x86_mmx_psrai_d:
-  case Intrinsic::x86_mmx_packsswb:
-  case Intrinsic::x86_mmx_packssdw:
-  case Intrinsic::x86_mmx_packuswb:
-  case Intrinsic::x86_mmx_punpckhbw:
-  case Intrinsic::x86_mmx_punpckhwd:
-  case Intrinsic::x86_mmx_punpckhdq:
-  case Intrinsic::x86_mmx_punpcklbw:
-  case Intrinsic::x86_mmx_punpcklwd:
-  case Intrinsic::x86_mmx_punpckldq:
-  case Intrinsic::x86_mmx_pcmpeq_b:
-  case Intrinsic::x86_mmx_pcmpeq_w:
-  case Intrinsic::x86_mmx_pcmpeq_d:
-  case Intrinsic::x86_mmx_pcmpgt_b:
-  case Intrinsic::x86_mmx_pcmpgt_w:
-  case Intrinsic::x86_mmx_pcmpgt_d: {
-    Value *Operands[2];
-    
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    switch (NewFn->getIntrinsicID()) {
-    default:
-      // Cast to the X86 MMX type.
-      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                    NewFn->getFunctionType()->getParamType(1),
-                                    "upgraded.", CI);
-      break;
-    case Intrinsic::x86_mmx_pslli_w:
-    case Intrinsic::x86_mmx_pslli_d:
-    case Intrinsic::x86_mmx_pslli_q:
-    case Intrinsic::x86_mmx_psrli_w:
-    case Intrinsic::x86_mmx_psrli_d:
-    case Intrinsic::x86_mmx_psrli_q:
-    case Intrinsic::x86_mmx_psrai_w:
-    case Intrinsic::x86_mmx_psrai_d:
-      // These take an i32 as their second parameter.
-      Operands[1] = CI->getArgOperand(1);
-      break;
-    }
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_maskmovq: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_pmovmskb: {
-    Value *Operands[1];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_movnt_dq: {
-    Value *Operands[2];
-
-    Operands[0] = CI->getArgOperand(0);
-
-    // Cast the operand to the X86 MMX type.
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_palignr_b: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_mmx_pextr_w: {
-    Value *Operands[2];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_pinsr_w: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_sse_pshuf_w: {
+  case Intrinsic::prefetch: {
     IRBuilder<> Builder(C);
     Builder.SetInsertPoint(CI->getParent(), CI);
+    const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
 
-    // Cast the operand to the X86 MMX type.
-    Value *Operands[2];
-    Operands[0] =
-      Builder.CreateBitCast(CI->getArgOperand(0), 
-                            NewFn->getFunctionType()->getParamType(0),
-                            "upgraded.");
-    Operands[1] =
-      Builder.CreateTrunc(CI->getArgOperand(1),
-                          Type::getInt8Ty(C),
-                          "upgraded.");
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-
-  case Intrinsic::ctlz:
-  case Intrinsic::ctpop:
-  case Intrinsic::cttz: {
-    //  Build a small vector of the original arguments.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-
-    //  Construct a new CallInst
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded."+CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Check for sign extend parameter attributes on the return values.
-      bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
-      bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
-      
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast = CastInst::Create(
-                            CastInst::getCastOpcode(NewCI, SrcSExt,
-                                                    F->getReturnType(),
-                                                    DestSExt),
-                            NewCI, F->getReturnType(),
-                            NewCI->getName(), CI);
-      NewCI->moveBefore(RetCast);
-
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(RetCast);
-    }
-
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::eh_selector:
-  case Intrinsic::eh_typeid_for: {
-    // Only the return type changed.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded." + CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 F->getReturnType(), true),
-                         NewCI, F->getReturnType(), NewCI->getName(), CI);
-      CI->replaceAllUsesWith(RetCast);
-    }
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove:
-  case Intrinsic::memset: {
-    // Add isVolatile
-    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
-    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2), CI->getArgOperand(3),
-                           llvm::ConstantInt::get(I1Ty, 0) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
+    // Add the extra "data cache" argument
+    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(2),
+                           llvm::ConstantInt::get(I32Ty, 1) };
+    CallInst *NewCI = CallInst::Create(NewFn, Operands,
                                        CI->getName(), CI);
     NewCI->setTailCall(CI->isTailCall());
     NewCI->setCallingConv(CI->getCallingConv());
     //  Handle any uses of the old CallInst.
     if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
+      //  Replace all uses of the old call with the new cast which has the
       //  correct type.
       CI->replaceAllUsesWith(NewCI);
-    
+
     //  Clean up the old call now that it has been completely upgraded.
     CI->eraseFromParent();
     break;
@@ -1354,13 +222,13 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
 
   // Upgrade the function and check if it is a totaly new function.
-  Function* NewFn;
+  Function *NewFn;
   if (UpgradeIntrinsicFunction(F, NewFn)) {
     if (NewFn != F) {
       // Replace all uses to the old function with the new one if necessary.
       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
            UI != UE; ) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
           UpgradeIntrinsicCall(CI, NewFn);
       }
       // Remove old function, no longer used, from the module.
@@ -1373,37 +241,27 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
 /// If an llvm.dbg.declare intrinsic is invalid, then this function simply
 /// strips that use.
 void llvm::CheckDebugInfoIntrinsics(Module *M) {
-
-
   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!FuncStart->use_empty())
+      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
     FuncStart->eraseFromParent();
   }
   
   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty()) {
-      CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      CI->eraseFromParent();
-    }
+    while (!StopPoint->use_empty())
+      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
     StopPoint->eraseFromParent();
   }
 
   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionStart->use_empty())
+      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
     RegionStart->eraseFromParent();
   }
 
   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionEnd->use_empty())
+      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
     RegionEnd->eraseFromParent();
   }
   
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 955a0285b260..70265c899d7e 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -147,6 +147,26 @@ Instruction* BasicBlock::getFirstNonPHIOrDbg() {
   return &*i;
 }
 
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  BasicBlock::iterator i = begin();
+  for (;; ++i) {
+    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+      continue;
+
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+    if (!II)
+      break;
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      break;
+  }
+  return &*i;
+}
+
 void BasicBlock::dropAllReferences() {
   for(iterator I = begin(), E = end(); I != E; ++I)
     I->dropAllReferences();
@@ -227,8 +247,8 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 
       // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
       if (max_idx == 2) {
-        if (PN->getOperand(0) != PN)
-          PN->replaceAllUsesWith(PN->getOperand(0));
+        if (PN->getIncomingValue(0) != PN)
+          PN->replaceAllUsesWith(PN->getIncomingValue(0));
         else
           // We are left with an infinite loop with no entries: kill the PHI.
           PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
@@ -308,3 +328,19 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   return New;
 }
 
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  TerminatorInst *TI = getTerminator();
+  if (!TI)
+    // Cope with being called on a BasicBlock that doesn't have a terminator
+    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+    return;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = TI->getSuccessor(i);
+    for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
+         ++II) {
+      int i;
+      while ((i = PN->getBasicBlockIndex(this)) >= 0)
+        PN->setIncomingBlock(i, New);
+    }
+  }
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 6bde263ce625..f60dd06c98a6 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -29,7 +29,6 @@ add_llvm_library(LLVMCore
   PassRegistry.cpp
   PrintModulePass.cpp
   Type.cpp
-  TypeSymbolTable.cpp
   Use.cpp
   User.cpp
   Value.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 9985adaf576e..323e2a280999 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -559,7 +559,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
       for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
         res.push_back(ConstantExpr::getCast(opc,
                                             CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
+      return ConstantVector::get(res);
     }
 
   // We actually have to do a cast now. Perform the cast according to the
@@ -730,9 +730,12 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
   }
 
 
+  if (isa<UndefValue>(Cond)) {
+    if (isa<UndefValue>(V1)) return V1;
+    return V2;
+  }
   if (isa<UndefValue>(V1)) return V2;
   if (isa<UndefValue>(V2)) return V1;
-  if (isa<UndefValue>(Cond)) return V1;
   if (V1 == V2) return V1;
 
   if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
@@ -877,42 +880,38 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
 }
 
 Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
-                                                    const unsigned *Idxs,
-                                                    unsigned NumIdx) {
+                                                    ArrayRef<unsigned> Idxs) {
   // Base case: no indices, so return the entire value.
-  if (NumIdx == 0)
+  if (Idxs.empty())
     return Agg;
 
   if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
     return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                            Idxs,
-                                                            Idxs + NumIdx));
+                                                            Idxs));
 
   if (isa<ConstantAggregateZero>(Agg))  // ev(0, x) -> 0
     return
       Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                              Idxs,
-                                                              Idxs + NumIdx));
+                                                              Idxs));
 
   // Otherwise recurse.
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
-    return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs),
-                                               Idxs+1, NumIdx-1);
+    return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
 
   if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
-    return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs),
-                                               Idxs+1, NumIdx-1);
+    return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
   ConstantVector *CV = cast<ConstantVector>(Agg);
-  return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs),
-                                             Idxs+1, NumIdx-1);
+  return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
+                                             Idxs.slice(1));
 }
 
 Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
                                                    Constant *Val,
-                                                   const unsigned *Idxs,
-                                                   unsigned NumIdx) {
+                                                   ArrayRef<unsigned> Idxs) {
   // Base case: no indices, so replace the entire value.
-  if (NumIdx == 0)
+  if (Idxs.empty())
     return Val;
 
   if (isa<UndefValue>(Agg)) {
@@ -934,15 +933,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
-        (*Idxs == i) ?
+        (Idxs[0] == i) ?
         ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
-                                           Val, Idxs+1, NumIdx-1) :
+                                           Val, Idxs.slice(1)) :
         UndefValue::get(MemberTy);
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -965,15 +964,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
-        (*Idxs == i) ?
+        (Idxs[0] == i) ?
         ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
-                                           Val, Idxs+1, NumIdx-1) :
+                                           Val, Idxs.slice(1)) :
         Constant::getNullValue(MemberTy);
       Ops[i] = Op;
     }
     
     if (const StructType *ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -982,13 +981,13 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     std::vector<Constant*> Ops(Agg->getNumOperands());
     for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
       Constant *Op = cast<Constant>(Agg->getOperand(i));
-      if (*Idxs == i)
-        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1);
+      if (Idxs[0] == i)
+        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
   }
 
@@ -1014,20 +1013,38 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::Add:
     case Instruction::Sub:
       return UndefValue::get(C1->getType());
-    case Instruction::Mul:
     case Instruction::And:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+        return C1;
+      return Constant::getNullValue(C1->getType());   // undef & X -> 0
+    case Instruction::Mul: {
+      ConstantInt *CI;
+      // X * undef -> undef   if X is odd or undef
+      if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+          ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+          (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+        return UndefValue::get(C1->getType());
+
+      // X * undef -> 0       otherwise
       return Constant::getNullValue(C1->getType());
+    }
     case Instruction::UDiv:
     case Instruction::SDiv:
+      // undef / 1 -> undef
+      if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+          if (CI2->isOne())
+            return C1;
+      // FALL THROUGH
     case Instruction::URem:
     case Instruction::SRem:
       if (!isa<UndefValue>(C2))                    // undef / X -> 0
         return Constant::getNullValue(C1->getType());
       return C2;                                   // X / undef -> undef
     case Instruction::Or:                          // X | undef -> -1
-      if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType()))
-        return Constant::getAllOnesValue(PTy);
-      return Constant::getAllOnesValue(C1->getType());
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+        return C1;
+      return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
     case Instruction::LShr:
       if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
         return C1;                                  // undef lshr undef -> undef
@@ -1041,6 +1058,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       else
         return C1;                                  // X ashr undef --> X
     case Instruction::Shl:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef shl undef -> undef
       // undef << X -> 0   or   X << undef -> 0
       return Constant::getNullValue(C1->getType());
     }
@@ -1443,8 +1462,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
 /// isZeroSizedType - This type is zero sized if its an array or structure of
 /// zero sized types.  The only leaf zero sized type is an empty structure.
 static bool isMaybeZeroSizedType(const Type *Ty) {
-  if (Ty->isOpaqueTy()) return true;  // Can't say.
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    if (STy->isOpaque()) return true;  // Can't say.
 
     // If all of elements have zero size, this does too.
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
@@ -1831,7 +1850,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
     // For EQ and NE, we can always pick a value for the undef to make the
     // predicate pass or fail, so we can return undef.
-    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+    // Also, if both operands are undef, we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
       return UndefValue::get(ResultTy);
     // Otherwise, pick the same value as the non-undef operand, and fold
     // it to true or false.
@@ -2147,9 +2168,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
                                                bool inBounds,
                                                IndexTy const *Idxs,
                                                unsigned NumIdx) {
+  if (NumIdx == 0) return C;
   Constant *Idx0 = cast<Constant>(Idxs[0]);
-  if (NumIdx == 0 ||
-      (NumIdx == 1 && Idx0->isNullValue()))
+  if ((NumIdx == 1 && Idx0->isNullValue()))
     return C;
 
   if (isa<UndefValue>(C)) {
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index 0ecd7b49a48e..653a1c3f377d 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -19,6 +19,8 @@
 #ifndef CONSTANTFOLDING_H
 #define CONSTANTFOLDING_H
 
+#include "llvm/ADT/ArrayRef.h"
+
 namespace llvm {
   class Value;
   class Constant;
@@ -38,11 +40,9 @@ namespace llvm {
   Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
                                                  Constant *Mask);
   Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
-                                                const unsigned *Idxs,
-                                                unsigned NumIdx);
+                                                ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
-                                               const unsigned *Idxs,
-                                               unsigned NumIdx);
+                                               ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
                                           Constant *V2);
   Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 15d7793d5893..316c8846f94f 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
@@ -39,6 +40,28 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
+bool Constant::isNegativeZeroValue() const {
+  // Floating point values have an explicit -0.0 value.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && CFP->isNegative();
+  
+  // Otherwise, just use +0.0.
+  return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+  // 0 is null.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isZero();
+  
+  // +0.0 is null.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && !CFP->isNegative();
+
+  // constant zero is zero for aggregates and cpnull is null for pointers.
+  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
 // Constructor to create a '0' constant of arbitrary type...
 Constant *Constant::getNullValue(const Type *Ty) {
   switch (Ty->getTypeID()) {
@@ -541,11 +564,7 @@ ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
          "FP type Mismatch");
 }
 
-bool ConstantFP::isNullValue() const {
-  return Val.isZero() && !Val.isNegative();
-}
-
-bool ConstantFP::isExactlyValue(const APFloat& V) const {
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
   return Val.bitwiseIsEqual(V);
 }
 
@@ -571,8 +590,7 @@ ConstantArray::ConstantArray(const ArrayType *T,
   }
 }
 
-Constant *ConstantArray::get(const ArrayType *Ty, 
-                             const std::vector<Constant*> &V) {
+Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     assert(V[i]->getType() == Ty->getElementType() &&
            "Wrong type in array element initializer");
@@ -592,13 +610,6 @@ Constant *ConstantArray::get(const ArrayType *Ty,
   return ConstantAggregateZero::get(Ty);
 }
 
-
-Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
-                             unsigned NumVals) {
-  // FIXME: make this the primary ctor method.
-  return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
-}
-
 /// ConstantArray::get(const string&) - Return an array that is initialized to
 /// contain the specified string.  If length is zero then a null terminator is 
 /// added to the specified string so that it may be used in a natural way. 
@@ -621,63 +632,64 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
   return get(ATy, ElementVals);
 }
 
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements.  The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+                                               ArrayRef<Constant*> V,
+                                               bool Packed) {
+  SmallVector<Type*, 16> EltTypes;
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    EltTypes.push_back(V[i]->getType());
+  
+  return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+                                               bool Packed) {
+  assert(!V.empty() &&
+         "ConstantStruct::getTypeForElements cannot be called on empty list");
+  return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
 ConstantStruct::ConstantStruct(const StructType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantStructVal,
              OperandTraits<ConstantStruct>::op_end(this) - V.size(),
              V.size()) {
-  assert(V.size() == T->getNumElements() &&
+  assert((T->isOpaque() || V.size() == T->getNumElements()) &&
          "Invalid initializer vector for constant structure");
   Use *OL = OperandList;
   for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
        I != E; ++I, ++OL) {
     Constant *C = *I;
-    assert(C->getType() == T->getElementType(I-V.begin()) &&
+    assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) &&
            "Initializer for struct element doesn't match struct element type!");
     *OL = C;
   }
 }
 
 // ConstantStruct accessors.
-Constant *ConstantStruct::get(const StructType* T,
-                              const std::vector<Constant*>& V) {
-  LLVMContextImpl* pImpl = T->getContext().pImpl;
-  
-  // Create a ConstantAggregateZero value if all elements are zeros...
+Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+  // Create a ConstantAggregateZero value if all elements are zeros.
   for (unsigned i = 0, e = V.size(); i != e; ++i)
     if (!V[i]->isNullValue())
-      return pImpl->StructConstants.getOrCreate(T, V);
-
-  return ConstantAggregateZero::get(T);
-}
-
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              const std::vector<Constant*>& V, bool packed) {
-  std::vector<const Type*> StructEls;
-  StructEls.reserve(V.size());
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    StructEls.push_back(V[i]->getType());
-  return get(StructType::get(Context, StructEls, packed), V);
-}
+      return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
 
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              Constant *const *Vals, unsigned NumVals,
-                              bool Packed) {
-  // FIXME: make this the primary ctor method.
-  return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
+  assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+         "Incorrect # elements specified to ConstantStruct::get");
+  return ConstantAggregateZero::get(ST);
 }
 
-Constant* ConstantStruct::get(LLVMContext &Context, bool Packed,
-                              Constant * Val, ...) {
+Constant* ConstantStruct::get(const StructType *T, ...) {
   va_list ap;
-  std::vector<Constant*> Values;
-  va_start(ap, Val);
-  while (Val) {
+  SmallVector<Constant*, 8> Values;
+  va_start(ap, T);
+  while (Constant *Val = va_arg(ap, llvm::Constant*))
     Values.push_back(Val);
-    Val = va_arg(ap, llvm::Constant*);
-  }
   va_end(ap);
-  return get(Context, Values, Packed);
+  return get(T, Values);
 }
 
 ConstantVector::ConstantVector(const VectorType *T,
@@ -696,9 +708,9 @@ ConstantVector::ConstantVector(const VectorType *T,
 }
 
 // ConstantVector accessors.
-Constant *ConstantVector::get(const VectorType *T,
-                              const std::vector<Constant*> &V) {
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
   assert(!V.empty() && "Vectors can't be empty");
+  const VectorType *T = VectorType::get(V.front()->getType(), V.size());
   LLVMContextImpl *pImpl = T->getContext().pImpl;
 
   // If this is an all-undef or all-zero vector, return a
@@ -723,12 +735,6 @@ Constant *ConstantVector::get(const VectorType *T,
   return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
-Constant *ConstantVector::get(ArrayRef<Constant*> V) {
-  // FIXME: make this the primary ctor method.
-  assert(!V.empty() && "Vectors cannot be empty");
-  return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
-}
-
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
 // Constant.h
@@ -779,8 +785,7 @@ ArrayRef<unsigned> ConstantExpr::getIndices() const {
 }
 
 unsigned ConstantExpr::getPredicate() const {
-  assert(getOpcode() == Instruction::FCmp || 
-         getOpcode() == Instruction::ICmp);
+  assert(isCompare());
   return ((const CompareConstantExpr*)this)->predicate;
 }
 
@@ -851,17 +856,15 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 }
 
 /// getWithOperands - This returns the current constant expression with the
-/// operands replaced with the specified values.  The specified operands must
-/// match count and type with the existing ones.
+/// operands replaced with the specified values.  The specified array must
+/// have the same number of operands as our current one.
 Constant *ConstantExpr::
-getWithOperands(ArrayRef<Constant*> Ops) const {
+getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
   assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
-  bool AnyChange = false;
-  for (unsigned i = 0; i != Ops.size(); ++i) {
-    assert(Ops[i]->getType() == getOperand(i)->getType() &&
-           "Operand type mismatch!");
+  bool AnyChange = Ty != getType();
+  for (unsigned i = 0; i != Ops.size(); ++i)
     AnyChange |= Ops[i] != getOperand(i);
-  }
+  
   if (!AnyChange)  // No operands changed, return self.
     return const_cast<ConstantExpr*>(this);
 
@@ -878,7 +881,7 @@ getWithOperands(ArrayRef<Constant*> Ops) const {
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::BitCast:
-    return ConstantExpr::getCast(getOpcode(), Ops[0], getType());
+    return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
   case Instruction::InsertElement:
@@ -976,14 +979,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantAggregateZero::destroyConstant() {
-  getRawType()->getContext().pImpl->AggZeroConstants.remove(this);
+  getType()->getContext().pImpl->AggZeroConstants.remove(this);
   destroyConstantImpl();
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantArray::destroyConstant() {
-  getRawType()->getContext().pImpl->ArrayConstants.remove(this);
+  getType()->getContext().pImpl->ArrayConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1023,44 +1026,54 @@ bool ConstantArray::isCString() const {
 }
 
 
-/// getAsString - If the sub-element type of this array is i8
-/// then this method converts the array to an std::string and returns it.
-/// Otherwise, it asserts out.
+/// convertToString - Helper function for getAsString() and getAsCString().
+static std::string convertToString(const User *U, unsigned len) {
+  std::string Result;
+  Result.reserve(len);
+  for (unsigned i = 0; i != len; ++i)
+    Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
+  return Result;
+}
+
+/// getAsString - If this array is isString(), then this method converts the
+/// array to an std::string and returns it.  Otherwise, it asserts out.
 ///
 std::string ConstantArray::getAsString() const {
   assert(isString() && "Not a string!");
-  std::string Result;
-  Result.reserve(getNumOperands());
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue());
-  return Result;
+  return convertToString(this, getNumOperands());
 }
 
 
-//---- ConstantStruct::get() implementation...
-//
+/// getAsCString - If this array is isCString(), then this method converts the
+/// array (without the trailing null byte) to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsCString() const {
+  assert(isCString() && "Not a string!");
+  return convertToString(this, getNumOperands() - 1);
+}
 
-namespace llvm {
 
-}
+//---- ConstantStruct::get() implementation...
+//
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantStruct::destroyConstant() {
-  getRawType()->getContext().pImpl->StructConstants.remove(this);
+  getType()->getContext().pImpl->StructConstants.remove(this);
   destroyConstantImpl();
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantVector::destroyConstant() {
-  getRawType()->getContext().pImpl->VectorConstants.remove(this);
+  getType()->getContext().pImpl->VectorConstants.remove(this);
   destroyConstantImpl();
 }
 
 /// This function will return true iff every element in this vector constant
 /// is set to all ones.
-/// @returns true iff this constant's emements are all set to all ones.
+/// @returns true iff this constant's elements are all set to all ones.
 /// @brief Determine if the value is all ones.
 bool ConstantVector::isAllOnesValue() const {
   // Check out first element.
@@ -1068,9 +1081,10 @@ bool ConstantVector::isAllOnesValue() const {
   const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
   if (!CI || !CI->isAllOnesValue()) return false;
   // Then make sure all remaining elements point to the same value.
-  for (unsigned I = 1, E = getNumOperands(); I < E; ++I) {
-    if (getOperand(I) != Elt) return false;
-  }
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt)
+      return false;
+  
   return true;
 }
 
@@ -1081,7 +1095,8 @@ Constant *ConstantVector::getSplatValue() const {
   Constant *Elt = getOperand(0);
   // Then make sure all remaining elements point to the same value.
   for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
-    if (getOperand(I) != Elt) return 0;
+    if (getOperand(I) != Elt)
+      return 0;
   return Elt;
 }
 
@@ -1095,7 +1110,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
-  getRawType()->getContext().pImpl->NullPtrConstants.remove(this);
+  getType()->getContext().pImpl->NullPtrConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1110,7 +1125,7 @@ UndefValue *UndefValue::get(const Type *Ty) {
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
-  getRawType()->getContext().pImpl->UndefValueConstants.remove(this);
+  getType()->getContext().pImpl->UndefValueConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1144,7 +1159,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
 // destroyConstant - Remove the constant from the constant table.
 //
 void BlockAddress::destroyConstant() {
-  getFunction()->getRawType()->getContext().pImpl
+  getFunction()->getType()->getContext().pImpl
     ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
   getBasicBlock()->AdjustBlockAddressRefCount(-1);
   destroyConstantImpl();
@@ -1183,7 +1198,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
   assert(NewBA != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(NewBA);
+  replaceAllUsesWith(NewBA);
   
   destroyConstant();
 }
@@ -1420,49 +1435,15 @@ Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
   return getFoldedCast(Instruction::BitCast, C, DstTy);
 }
 
-Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
-                              Constant *C1, Constant *C2,
-                              unsigned Flags) {
-  // Check the operands for consistency first
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
+  // Check the operands for consistency first.
   assert(Opcode >= Instruction::BinaryOpsBegin &&
          Opcode <  Instruction::BinaryOpsEnd   &&
          "Invalid opcode in binary constant expression");
   assert(C1->getType() == C2->getType() &&
          "Operand types in binary constant expression should match");
-
-  if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext()))
-    if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
-      return FC;          // Fold a few common cases...
-
-  std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
-  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *ConstantExpr::getCompareTy(unsigned short predicate,
-                                     Constant *C1, Constant *C2) {
-  switch (predicate) {
-    default: llvm_unreachable("Invalid CmpInst predicate");
-    case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
-    case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
-    case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
-    case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
-    case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
-    case CmpInst::FCMP_TRUE:
-      return getFCmp(predicate, C1, C2);
-
-    case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
-    case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
-    case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
-    case CmpInst::ICMP_SLE:
-      return getICmp(predicate, C1, C2);
-  }
-}
-
-Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
-                            unsigned Flags) {
 #ifndef NDEBUG
   switch (Opcode) {
   case Instruction::Add:
@@ -1521,7 +1502,15 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   }
 #endif
 
-  return getTy(C1->getType(), Opcode, C1, C2, Flags);
+  if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+    return FC;          // Fold a few common cases.
+  
+  std::vector<Constant*> argVec(1, C1);
+  argVec.push_back(C2);
+  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+  
+  LLVMContextImpl *pImpl = C1->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
 }
 
 Constant *ConstantExpr::getSizeOf(const Type* Ty) {
@@ -1537,8 +1526,8 @@ Constant *ConstantExpr::getSizeOf(const Type* Ty) {
 Constant *ConstantExpr::getAlignOf(const Type* Ty) {
   // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
-  const Type *AligningTy = StructType::get(Ty->getContext(),
-                                   Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  const Type *AligningTy = 
+    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
   Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1566,41 +1555,55 @@ Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
                      Type::getInt64Ty(Ty->getContext()));
 }
 
-Constant *ConstantExpr::getCompare(unsigned short pred, 
-                            Constant *C1, Constant *C2) {
+Constant *ConstantExpr::getCompare(unsigned short Predicate, 
+                                   Constant *C1, Constant *C2) {
   assert(C1->getType() == C2->getType() && "Op types should be identical!");
-  return getCompareTy(pred, C1, C2);
+  
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid CmpInst predicate");
+  case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+  case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+  case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+  case CmpInst::FCMP_TRUE:
+    return getFCmp(Predicate, C1, C2);
+    
+  case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+  case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return getICmp(Predicate, C1, C2);
+  }
 }
 
-Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
-                                    Constant *V1, Constant *V2) {
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
   assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
 
-  if (ReqTy == V1->getType())
-    if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
-      return SC;        // Fold common cases
+  if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+    return SC;        // Fold common cases
 
   std::vector<Constant*> argVec(3, C);
   argVec[1] = V1;
   argVec[2] = V2;
   ExprMapKeyType Key(Instruction::Select, argVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
 }
 
-template<typename IndexTy>
-Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
-                                           IndexTy const *Idxs,
-                                           unsigned NumIdx, bool InBounds) {
-  assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
-                                           Idxs+NumIdx) ==
-         cast<PointerType>(ReqTy)->getElementType() &&
-         "GEP indices invalid!");
-
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
   if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
     return FC;          // Fold a few common cases.
 
+  // Get the result type of the getelementptr!
+  const Type *Ty = 
+    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+  assert(Ty && "GEP indices invalid!");
+  unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+  Type *ReqTy = Ty->getPointerTo(AS);
+  
   assert(C->getType()->isPointerTy() &&
          "Non-pointer type for constant GetElementPtr expression");
   // Look up the constant in the table first to ensure uniqueness
@@ -1611,32 +1614,11 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
                            InBounds ? GEPOperator::IsInBounds : 0);
-
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
-template<typename IndexTy>
-Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs,
-                                             unsigned NumIdx, bool InBounds) {
-  // Get the result type of the getelementptr!
-  const Type *Ty = 
-    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
-  assert(Ty && "GEP indices invalid!");
-  unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
-  return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
-                                         unsigned NumIdx, bool InBounds) {
-  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs,
-                                         unsigned NumIdx, bool InBounds) {
-  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
-}
-
 Constant *
 ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   assert(LHS->getType() == RHS->getType());
@@ -1684,39 +1666,22 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
 }
 
-Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
-                                            Constant *Idx) {
-  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
-    return FC;          // Fold a few common cases.
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
-  
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(Val->getType()->isVectorTy() &&
          "Tried to create extractelement operation on non-vector type!");
   assert(Idx->getType()->isIntegerTy(32) &&
          "Extractelement index must be i32 type!");
-  return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
-                             Val, Idx);
-}
-
-Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
-                                           Constant *Elt, Constant *Idx) {
-  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+  
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
     return FC;          // Fold a few common cases.
+  
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Elt);
   ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
@@ -1728,21 +1693,17 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
          && "Insertelement types must match!");
   assert(Idx->getType()->isIntegerTy(32) &&
          "Insertelement index must be i32 type!");
-  return getInsertElementTy(Val->getType(), Val, Elt, Idx);
-}
 
-Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
-                                           Constant *V2, Constant *Mask) {
-  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
-    return FC;          // Fold a few common cases...
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+    return FC;          // Fold a few common cases.
   // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, V1);
-  ArgVec.push_back(V2);
-  ArgVec.push_back(Mask);
-  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Elt);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
 }
 
 Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
@@ -1750,62 +1711,49 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
   assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
          "Invalid shuffle vector constant expr operands!");
 
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+    return FC;          // Fold a few common cases.
+
   unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
   const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
   const Type *ShufTy = VectorType::get(EltTy, NElts);
-  return getShuffleVectorTy(ShufTy, V1, V2, Mask);
-}
 
-Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
-                                         Constant *Val,
-                                        const unsigned *Idxs, unsigned NumIdx) {
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
-                                          Idxs+NumIdx) == Val->getType() &&
-         "insertvalue indices invalid!");
-  assert(Agg->getType() == ReqTy &&
-         "insertvalue type invalid!");
-  assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant InsertValue expression");
-  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
-  assert(FC && "InsertValue constant expr couldn't be folded!");
-  return FC;
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, V1);
+  ArgVec.push_back(V2);
+  ArgVec.push_back(Mask);
+  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
 }
 
 Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
-                                     const unsigned *IdxList, unsigned NumIdx) {
-  assert(Agg->getType()->isFirstClassType() &&
-         "Tried to create insertelement operation on non-first-class type!");
-
-  const Type *ReqTy = Agg->getType();
-#ifndef NDEBUG
-  const Type *ValTy =
-    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
-#endif
-  assert(ValTy == Val->getType() && "insertvalue indices invalid!");
-  return getInsertValueTy(ReqTy, Agg, Val, IdxList, NumIdx);
-}
-
-Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
-                                        const unsigned *Idxs, unsigned NumIdx) {
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
-                                          Idxs+NumIdx) == ReqTy &&
-         "extractvalue indices invalid!");
+                                       ArrayRef<unsigned> Idxs) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(),
+                                          Idxs) == Val->getType() &&
+         "insertvalue indices invalid!");
   assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant extractvalue expression");
-  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
-  assert(FC && "ExtractValue constant expr couldn't be folded!");
+         "Non-first-class type for constant insertvalue expression");
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+  assert(FC && "insertvalue constant expr couldn't be folded!");
   return FC;
 }
 
 Constant *ConstantExpr::getExtractValue(Constant *Agg,
-                                     const unsigned *IdxList, unsigned NumIdx) {
+                                        ArrayRef<unsigned> Idxs) {
   assert(Agg->getType()->isFirstClassType() &&
          "Tried to create extractelement operation on non-first-class type!");
 
-  const Type *ReqTy =
-    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+  const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+  (void)ReqTy;
   assert(ReqTy && "extractvalue indices invalid!");
-  return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
+  
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant extractvalue expression");
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+  assert(FC && "ExtractValue constant expr couldn't be folded!");
+  return FC;
 }
 
 Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
@@ -1918,7 +1866,7 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantExpr::destroyConstant() {
-  getRawType()->getContext().pImpl->ExprConstants.remove(this);
+  getType()->getContext().pImpl->ExprConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1959,10 +1907,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
-  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
 
   std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
-  Lookup.first.first = cast<ArrayType>(getRawType());
+  Lookup.first.first = cast<ArrayType>(getType());
   Lookup.second = this;
 
   std::vector<Constant*> &Values = Lookup.first.second;
@@ -1996,7 +1944,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   
   Constant *Replacement = 0;
   if (isAllZeros) {
-    Replacement = ConstantAggregateZero::get(getRawType());
+    Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
     bool Exists;
@@ -2032,7 +1980,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2047,7 +1995,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
 
   std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
-  Lookup.first.first = cast<StructType>(getRawType());
+  Lookup.first.first = cast<StructType>(getType());
   Lookup.second = this;
   std::vector<Constant*> &Values = Lookup.first.second;
   Values.reserve(getNumOperands());  // Build replacement struct.
@@ -2069,11 +2017,11 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   }
   Values[OperandToUpdate] = ToC;
   
-  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+  LLVMContextImpl *pImpl = getContext().pImpl;
   
   Constant *Replacement = 0;
   if (isAllZeros) {
-    Replacement = ConstantAggregateZero::get(getRawType());
+    Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this struct type already.
     bool Exists;
@@ -2098,7 +2046,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2116,11 +2064,11 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Values.push_back(Val);
   }
   
-  Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
+  Constant *Replacement = get(Values);
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2151,8 +2099,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (Agg == From) Agg = To;
     
     ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getExtractValue(Agg,
-                                                &Indices[0], Indices.size());
+    Replacement = ConstantExpr::getExtractValue(Agg, Indices);
   } else if (getOpcode() == Instruction::InsertValue) {
     Constant *Agg = getOperand(0);
     Constant *Val = getOperand(1);
@@ -2160,11 +2107,10 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (Val == From) Val = To;
     
     ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getInsertValue(Agg, Val,
-                                               &Indices[0], Indices.size());
+    Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
   } else if (isCast()) {
     assert(getOperand(0) == From && "Cast only has one use!");
-    Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType());
+    Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
   } else if (getOpcode() == Instruction::Select) {
     Constant *C1 = getOperand(0);
     Constant *C2 = getOperand(1);
@@ -2220,7 +2166,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 13957545786d..bd134d9b892d 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -568,15 +568,13 @@ struct ConstantKeyData<InlineAsm> {
   }
 };
 
-template<class ValType, class TypeClass, class ConstantClass,
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
          bool HasLargeKey = false /*true for arrays and structs*/ >
-class ConstantUniqueMap : public AbstractTypeUser {
+class ConstantUniqueMap {
 public:
   typedef std::pair<const TypeClass*, ValType> MapKey;
   typedef std::map<MapKey, ConstantClass *> MapTy;
   typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
-  typedef std::map<const DerivedType*, typename MapTy::iterator>
-    AbstractTypeMapTy;
 private:
   /// Map - This is the main map from the element descriptor to the Constants.
   /// This is the primary way we avoid creating two of the same shape
@@ -589,10 +587,6 @@ private:
   /// through the map with very large keys.
   InverseMapTy InverseMap;
 
-  /// AbstractTypeMap - Map for abstract type constants.
-  ///
-  AbstractTypeMapTy AbstractTypeMap;
-    
 public:
   typename MapTy::iterator map_begin() { return Map.begin(); }
   typename MapTy::iterator map_end() { return Map.end(); }
@@ -629,7 +623,7 @@ private:
     }
       
     typename MapTy::iterator I =
-      Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
+      Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()),
                       ConstantKeyData<ConstantClass>::getValType(CP)));
     if (I == Map.end() || I->second != CP) {
       // FIXME: This should not use a linear scan.  If this gets to be a
@@ -639,24 +633,8 @@ private:
     }
     return I;
   }
-    
-  void AddAbstractTypeUser(const Type *Ty, typename MapTy::iterator I) {
-    // If the type of the constant is abstract, make sure that an entry
-    // exists for it in the AbstractTypeMap.
-    if (Ty->isAbstract()) {
-      const DerivedType *DTy = static_cast<const DerivedType *>(Ty);
-      typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(DTy);
-
-      if (TI == AbstractTypeMap.end()) {
-        // Add ourselves to the ATU list of the type.
-        cast<DerivedType>(DTy)->addAbstractTypeUser(this);
-
-        AbstractTypeMap.insert(TI, std::make_pair(DTy, I));
-      }
-    }
-  }
 
-  ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+  ConstantClass *Create(const TypeClass *Ty, ValRefType V,
                         typename MapTy::iterator I) {
     ConstantClass* Result =
       ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
@@ -667,15 +645,13 @@ private:
     if (HasLargeKey)  // Remember the reverse mapping if needed.
       InverseMap.insert(std::make_pair(Result, I));
 
-    AddAbstractTypeUser(Ty, I);
-      
     return Result;
   }
 public:
     
   /// getOrCreate - Return the specified constant from the map, creating it if
   /// necessary.
-  ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+  ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
     MapKey Lookup(Ty, V);
     ConstantClass* Result = 0;
     
@@ -692,43 +668,6 @@ public:
     return Result;
   }
 
-  void UpdateAbstractTypeMap(const DerivedType *Ty,
-                             typename MapTy::iterator I) {
-    assert(AbstractTypeMap.count(Ty) &&
-           "Abstract type not in AbstractTypeMap?");
-    typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
-    if (ATMEntryIt == I) {
-      // Yes, we are removing the representative entry for this type.
-      // See if there are any other entries of the same type.
-      typename MapTy::iterator TmpIt = ATMEntryIt;
-
-      // First check the entry before this one...
-      if (TmpIt != Map.begin()) {
-        --TmpIt;
-        if (TmpIt->first.first != Ty) // Not the same type, move back...
-          ++TmpIt;
-      }
-
-      // If we didn't find the same type, try to move forward...
-      if (TmpIt == ATMEntryIt) {
-        ++TmpIt;
-        if (TmpIt == Map.end() || TmpIt->first.first != Ty)
-          --TmpIt;   // No entry afterwards with the same type
-      }
-
-      // If there is another entry in the map of the same abstract type,
-      // update the AbstractTypeMap entry now.
-      if (TmpIt != ATMEntryIt) {
-        ATMEntryIt = TmpIt;
-      } else {
-        // Otherwise, we are removing the last instance of this type
-        // from the table.  Remove from the ATM, and from user list.
-        cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
-        AbstractTypeMap.erase(Ty);
-      }
-    }
-  }
-
   void remove(ConstantClass *CP) {
     typename MapTy::iterator I = FindExistingElement(CP);
     assert(I != Map.end() && "Constant not found in constant table!");
@@ -736,12 +675,6 @@ public:
 
     if (HasLargeKey)  // Remember the reverse mapping if needed.
       InverseMap.erase(CP);
-      
-    // Now that we found the entry, make sure this isn't the entry that
-    // the AbstractTypeMap points to.
-    const TypeClass *Ty = I->first.first;
-    if (Ty->isAbstract())
-      UpdateAbstractTypeMap(static_cast<const DerivedType *>(Ty), I);
 
     Map.erase(I);
   }
@@ -755,22 +688,7 @@ public:
     assert(OldI != Map.end() && "Constant not found in constant table!");
     assert(OldI->second == C && "Didn't find correct element?");
       
-    // If this constant is the representative element for its abstract type,
-    // update the AbstractTypeMap so that the representative element is I.
-    //
-    // This must use getRawType() because if the type is under refinement, we
-    // will get the refineAbstractType callback below, and we don't want to
-    // kick union find in on the constant.
-    if (C->getRawType()->isAbstract()) {
-      typename AbstractTypeMapTy::iterator ATI =
-          AbstractTypeMap.find(cast<DerivedType>(C->getRawType()));
-      assert(ATI != AbstractTypeMap.end() &&
-             "Abstract type not in AbstractTypeMap?");
-      if (ATI->second == OldI)
-        ATI->second = I;
-    }
-      
-    // Remove the old entry from the map.
+     // Remove the old entry from the map.
     Map.erase(OldI);
     
     // Update the inverse map so that we know that this constant is now
@@ -780,58 +698,6 @@ public:
       InverseMap[C] = I;
     }
   }
-    
-  void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-    typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy);
-
-    assert(I != AbstractTypeMap.end() &&
-           "Abstract type not in AbstractTypeMap?");
-
-    // Convert a constant at a time until the last one is gone.  The last one
-    // leaving will remove() itself, causing the AbstractTypeMapEntry to be
-    // eliminated eventually.
-    do {
-      ConstantClass *C = I->second->second;
-      MapKey Key(cast<TypeClass>(NewTy),
-                 ConstantKeyData<ConstantClass>::getValType(C));
-
-      std::pair<typename MapTy::iterator, bool> IP =
-        Map.insert(std::make_pair(Key, C));
-      if (IP.second) {
-        // The map didn't previously have an appropriate constant in the
-        // new type.
-        
-        // Remove the old entry.
-        typename MapTy::iterator OldI =
-          Map.find(MapKey(cast<TypeClass>(OldTy), IP.first->first.second));
-        assert(OldI != Map.end() && "Constant not in map!");
-        UpdateAbstractTypeMap(OldTy, OldI);
-        Map.erase(OldI);
-
-        // Set the constant's type. This is done in place!
-        setType(C, NewTy);
-
-        // Update the inverse map so that we know that this constant is now
-        // located at descriptor I.
-        if (HasLargeKey)
-          InverseMap[C] = IP.first;
-
-        AddAbstractTypeUser(NewTy, IP.first);
-      } else {
-        // The map already had an appropriate constant in the new type, so
-        // there's no longer a need for the old constant.
-        C->uncheckedReplaceAllUsesWith(IP.first->second);
-        C->destroyConstant();    // This constant is now dead, destroy it.
-      }
-      I = AbstractTypeMap.find(OldTy);
-    } while (I != AbstractTypeMap.end());
-  }
-
-  // If the type became concrete without being refined to any other existing
-  // type, we just remove ourselves from the ATU list.
-  void typeBecameConcrete(const DerivedType *AbsTy) {
-    AbsTy->removeAbstractTypeUser(this);
-  }
 
   void dump() const {
     DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 92f944027a7c..2a816e123a61 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -19,7 +19,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/PassManager.h"
@@ -111,27 +110,6 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
   unwrap(M)->setTargetTriple(Triple);
 }
 
-/*--.. Type names ..........................................................--*/
-LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
-  return unwrap(M)->addTypeName(Name, unwrap(Ty));
-}
-
-void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) {
-  TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable();
-
-  TypeSymbolTable::iterator I = TST.find(Name);
-  if (I != TST.end())
-    TST.remove(I);
-}
-
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
-  return wrap(unwrap(M)->getTypeByName(Name));
-}
-
-const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) {
-  return unwrap(M)->getTypeName(unwrap(Ty)).c_str();
-}
-
 void LLVMDumpModule(LLVMModuleRef M) {
   unwrap(M)->dump();
 }
@@ -182,8 +160,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMArrayTypeKind;
   case Type::PointerTyID:
     return LLVMPointerTypeKind;
-  case Type::OpaqueTyID:
-    return LLVMOpaqueTypeKind;
   case Type::VectorTyID:
     return LLVMVectorTypeKind;
   case Type::X86_MMXTyID:
@@ -284,10 +260,7 @@ LLVMTypeRef LLVMX86MMXType(void) {
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
                              LLVMBool IsVarArg) {
-  std::vector<const Type*> Tys;
-  for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I)
-    Tys.push_back(unwrap(*I));
-  
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
   return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
 }
 
@@ -314,11 +287,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
 
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                            unsigned ElementCount, LLVMBool Packed) {
-  std::vector<const Type*> Tys;
-  for (LLVMTypeRef *I = ElementTypes,
-                   *E = ElementTypes + ElementCount; I != E; ++I)
-    Tys.push_back(unwrap(*I));
-  
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
   return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
 }
 
@@ -328,6 +297,16 @@ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
                                  ElementCount, Packed);
 }
 
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+  return wrap(StructType::createNamed(*unwrap(C), Name));
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+                       unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
 
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->getNumElements();
@@ -344,6 +323,14 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isPacked();
 }
 
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
+}
+
 /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
 
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -382,9 +369,6 @@ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
   return wrap(Type::getLabelTy(*unwrap(C)));
 }
-LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C) {
-  return wrap(OpaqueType::get(*unwrap(C)));
-}
 
 LLVMTypeRef LLVMVoidType(void)  {
   return LLVMVoidTypeInContext(LLVMGetGlobalContext());
@@ -392,28 +376,6 @@ LLVMTypeRef LLVMVoidType(void)  {
 LLVMTypeRef LLVMLabelType(void) {
   return LLVMLabelTypeInContext(LLVMGetGlobalContext());
 }
-LLVMTypeRef LLVMOpaqueType(void) {
-  return LLVMOpaqueTypeInContext(LLVMGetGlobalContext());
-}
-
-/*--.. Operations on type handles ..........................................--*/
-
-LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy) {
-  return wrap(new PATypeHolder(unwrap(PotentiallyAbstractTy)));
-}
-
-void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle) {
-  delete unwrap(TypeHandle);
-}
-
-LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle) {
-  return wrap(unwrap(TypeHandle)->get());
-}
-
-void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy) {
-  unwrap<DerivedType>(AbstractTy)->refineAbstractTypeTo(unwrap(ConcreteTy));
-}
-
 
 /*===-- Operations on values ----------------------------------------------===*/
 
@@ -612,9 +574,10 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
                                       unsigned Count, LLVMBool Packed) {
-  return wrap(ConstantStruct::get(*unwrap(C),
-                                  unwrap<Constant>(ConstantVals, Count),
-                                  Count, Packed != 0));
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  return wrap(ConstantStruct::getAnon(*unwrap(C),
+                                      ArrayRef<Constant*>(Elements, Count),
+                                      Packed != 0));
 }
 
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
@@ -624,15 +587,24 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
 }
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length) {
-  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
-                                 unwrap<Constant>(ConstantVals, Length),
-                                 Length));
+  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
 }
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed) {
   return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
                                   Packed);
 }
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+                                  LLVMValueRef *ConstantVals,
+                                  unsigned Count) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  const StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+  return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count)));
+}
+
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
   return wrap(ConstantVector::get(ArrayRef<Constant*>(
                             unwrap<Constant>(ScalarConstantVals, Size), Size)));
@@ -962,7 +934,8 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
 LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
                                    unsigned NumIdx) {
   return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
-                                            IdxList, NumIdx));
+                                            ArrayRef<unsigned>(IdxList,
+                                                               NumIdx)));
 }
 
 LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
@@ -970,7 +943,8 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                   unsigned *IdxList, unsigned NumIdx) {
   return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
                                          unwrap<Constant>(ElementValueConstant),
-                                           IdxList, NumIdx));
+                                           ArrayRef<unsigned>(IdxList,
+                                                              NumIdx)));
 }
 
 LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
@@ -1706,7 +1680,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name) {
   return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
-                                      unwrap(Args), unwrap(Args) + NumArgs,
+                                      ArrayRef<Value *>(unwrap(Args), NumArgs),
                                       Name));
 }
 
@@ -2089,8 +2063,9 @@ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
 LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
                            LLVMValueRef *Args, unsigned NumArgs,
                            const char *Name) {
-  return wrap(unwrap(B)->CreateCall(unwrap(Fn), unwrap(Args),
-                                    unwrap(Args) + NumArgs, Name));
+  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+                                    ArrayRef<Value *>(unwrap(Args), NumArgs),
+                                    Name));
 }
 
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index 520333cbbcf2..4ff6b2cd80e8 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -128,6 +128,38 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
   return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
 }
 
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+  if (N == 0 || N->getNumOperands() < 3) return DebugLoc();
+  
+  MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(1));
+  if (Scope == 0) return DebugLoc();
+  
+  unsigned LineNo = 0, ColNo = 0;
+  if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(2)))
+    LineNo = Line->getZExtValue();
+  if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(3)))
+    ColNo = Col->getZExtValue();
+  
+  return get(LineNo, ColNo, Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+  if (!isUnknown()) {
+    dbgs() << getLine();
+    if (getCol() != 0)
+      dbgs() << ',' << getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      dbgs() << " @ ";
+      InlinedAtDL.dump(Ctx);
+    } else
+      dbgs() << "\n";
+  }
+#endif
+}
+
 //===----------------------------------------------------------------------===//
 // DenseMap specialization
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 0ae0bdb8056a..6536bcd0e2ed 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -134,7 +134,7 @@ LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-const FunctionType *Function::getFunctionType() const {
+FunctionType *Function::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
 
@@ -142,7 +142,7 @@ bool Function::isVarArg() const {
   return getFunctionType()->isVarArg();
 }
 
-const Type *Function::getReturnType() const {
+Type *Function::getReturnType() const {
   return getFunctionType()->getReturnType();
 }
 
@@ -163,7 +163,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
   : GlobalValue(PointerType::getUnqual(Ty), 
                 Value::FunctionVal, 0, 0, Linkage, name) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
-         !getReturnType()->isOpaqueTy() && "invalid return type");
+         "invalid return type");
   SymTab = new ValueSymbolTable();
 
   // If the function has arguments, mark them as lazily built.
@@ -333,7 +333,7 @@ unsigned Function::getIntrinsicID() const {
   return 0;
 }
 
-std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { 
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
   static const char * const Table[] = {
     "not_intrinsic",
@@ -341,10 +341,10 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_NAME_TABLE
   };
-  if (numTys == 0)
+  if (Tys.empty())
     return Table[id];
   std::string Result(Table[id]);
-  for (unsigned i = 0; i < numTys; ++i) {
+  for (unsigned i = 0; i < Tys.size(); ++i) {
     if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
       Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
                 EVT::getEVT(PTyp->getElementType()).getEVTString();
@@ -356,10 +356,9 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
 }
 
 const FunctionType *Intrinsic::getType(LLVMContext &Context,
-                                       ID id, const Type **Tys, 
-                                       unsigned numTys) {
+                                       ID id, ArrayRef<Type*> Tys) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
   
 #define GET_INTRINSIC_GENERATOR
@@ -384,14 +383,12 @@ bool Intrinsic::isOverloaded(ID id) {
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_ATTRIBUTES
 
-Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, 
-                                    unsigned numTys) {
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
   // There can never be multiple globals with the same name of different types,
   // because intrinsics must be a specific type.
   return
-    cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys),
-                                          getType(M->getContext(),
-                                                  id, Tys, numTys)));
+    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+                                          getType(M->getContext(), id, Tys)));
 }
 
 // This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
@@ -417,7 +414,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
 /// setjmp or other function that gcc recognizes as "returning twice".
 ///
 /// FIXME: Remove after <rdar://problem/8031714> is fixed.
-/// FIXME: Is the obove FIXME valid?
+/// FIXME: Is the above FIXME valid?
 bool Function::callsFunctionThatReturnsTwice() const {
   const Module *M = this->getParent();
   static const char *ReturnsTwiceFns[] = {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 60000ad1b50e..db008e09d1c8 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -51,6 +51,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setAlignment(Src->getAlignment());
   setSection(Src->getSection());
   setVisibility(Src->getVisibility());
+  setUnnamedAddr(Src->hasUnnamedAddr());
 }
 
 void GlobalValue::setAlignment(unsigned Align) {
@@ -60,6 +61,20 @@ void GlobalValue::setAlignment(unsigned Align) {
   Alignment = Log2_32(Align) + 1;
   assert(getAlignment() == Align && "Alignment representation error!");
 }
+
+bool GlobalValue::isDeclaration() const {
+  // Globals are definitions if they have an initializer.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return GV->getNumOperands() == 0;
+
+  // Functions are definitions if they have a body.
+  if (const Function *F = dyn_cast<Function>(this))
+    return F->empty();
+
+  // Aliases are always definitions.
+  assert(isa<GlobalAlias>(this));
+  return false;
+}
   
 //===----------------------------------------------------------------------===//
 // GlobalVariable Implementation
@@ -201,39 +216,26 @@ void GlobalAlias::eraseFromParent() {
   getParent()->getAliasList().erase(this);
 }
 
-bool GlobalAlias::isDeclaration() const {
-  const GlobalValue* AV = getAliasedGlobal();
-  if (AV)
-    return AV->isDeclaration();
-  else
-    return false;
-}
-
-void GlobalAlias::setAliasee(Constant *Aliasee) 
-{
-  if (Aliasee)
-    assert(Aliasee->getType() == getType() &&
-           "Alias and aliasee types should match!");
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+  assert((!Aliasee || Aliasee->getType() == getType()) &&
+         "Alias and aliasee types should match!");
   
   setOperand(0, Aliasee);
 }
 
 const GlobalValue *GlobalAlias::getAliasedGlobal() const {
   const Constant *C = getAliasee();
-  if (C) {
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-      return GV;
-    else {
-      const ConstantExpr *CE = 0;
-      if ((CE = dyn_cast<ConstantExpr>(C)) &&
-          (CE->getOpcode() == Instruction::BitCast || 
-           CE->getOpcode() == Instruction::GetElementPtr))
-        return dyn_cast<GlobalValue>(CE->getOperand(0));
-      else
-        llvm_unreachable("Unsupported aliasee");
-    }
-  }
-  return 0;
+  if (C == 0) return 0;
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GV;
+
+  const ConstantExpr *CE = cast<ConstantExpr>(C);
+  assert((CE->getOpcode() == Instruction::BitCast || 
+          CE->getOpcode() == Instruction::GetElementPtr) &&
+         "Unsupported aliasee");
+  
+  return dyn_cast<GlobalValue>(CE->getOperand(0));
 }
 
 const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
@@ -254,7 +256,7 @@ const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
     GV = GA->getAliasedGlobal();
 
     if (!Visited.insert(GV))
-      return NULL;
+      return 0;
   }
 
   return GV;
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index f2d469a2d84f..ffe961fee7c2 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -34,7 +34,7 @@ Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
   return GV;
 }
 
-const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
   assert(BB && BB->getParent() && "No current function!");
   return BB->getParent()->getReturnType();
 }
@@ -52,9 +52,9 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
   return BCI;
 }
 
-static CallInst *createCallHelper(Value *Callee, Value *const* Ops,
-                                  unsigned NumOps, IRBuilderBase *Builder) {
-  CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, "");
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+                                  IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, "");
   Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
   Builder->SetInstDebugLocation(CI);
   return CI;  
@@ -65,11 +65,11 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag) {
   Ptr = getCastedInt8PtrValue(Ptr);
   Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Type *Tys[] = { Ptr->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -85,11 +85,11 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
   Src = getCastedInt8PtrValue(Src);
 
   Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -105,11 +105,11 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
   Src = getCastedInt8PtrValue(Src);
   
   Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -130,7 +130,7 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
-  return createCallHelper(TheFn, Ops, 2, this);
+  return createCallHelper(TheFn, Ops, this);
 }
 
 CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
@@ -145,5 +145,5 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
-  return createCallHelper(TheFn, Ops, 2, this);
+  return createCallHelper(TheFn, Ops, this);
 }
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index bd3667db7614..4a03b395e98e 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -47,11 +47,11 @@ InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
 }
 
 void InlineAsm::destroyConstant() {
-  getRawType()->getContext().pImpl->InlineAsms.remove(this);
+  getType()->getContext().pImpl->InlineAsms.remove(this);
   delete this;
 }
 
-const FunctionType *InlineAsm::getFunctionType() const {
+FunctionType *InlineAsm::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
     
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 2c8b8b23b18e..02c075743959 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -204,22 +204,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
            CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
-    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
-      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
-    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
-      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
 
   return true;
 }
@@ -256,22 +244,10 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
            CI->getAttributes() ==
              cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
-    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
-      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
-    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
-      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
 
   return true;
 }
@@ -429,8 +405,10 @@ Instruction *Instruction::clone() const {
   // Otherwise, enumerate and copy over metadata from the old instruction to the
   // new one.
   SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
-  getAllMetadata(TheMDs);
+  getAllMetadataOtherThanDebugLoc(TheMDs);
   for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
     New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+  
+  New->setDebugLoc(getDebugLoc());
   return New;
 }
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 8f4eabeb8aee..9baad09cb272 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -87,11 +87,8 @@ PHINode::PHINode(const PHINode &PN)
   : Instruction(PN.getType(), Instruction::PHI,
                 allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
     ReservedSpace(PN.getNumOperands()) {
-  Use *OL = OperandList;
-  for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) {
-    OL[i] = PN.getOperand(i);
-    OL[i+1] = PN.getOperand(i+1);
-  }
+  std::copy(PN.op_begin(), PN.op_end(), op_begin());
+  std::copy(PN.block_begin(), PN.block_end(), block_begin());
   SubclassOptionalData = PN.SubclassOptionalData;
 }
 
@@ -99,31 +96,37 @@ PHINode::~PHINode() {
   dropHungoffUses();
 }
 
+Use *PHINode::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses of the incoming values, followed by a pointer
+  // (with bottom bit set) to the User, followed by the array of pointers to
+  // the incoming basic blocks.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+    + N * sizeof(BasicBlock*);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
 // removeIncomingValue - Remove an incoming value.  This is useful if a
 // predecessor basic block is deleted.
 Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
-  unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
-  assert(Idx*2 < NumOps && "BB not in PHI node!");
-  Value *Removed = OL[Idx*2];
+  Value *Removed = getIncomingValue(Idx);
 
   // Move everything after this operand down.
   //
   // FIXME: we could just swap with the end of the list, then erase.  However,
-  // client might not expect this to happen.  The code as it is thrashes the
+  // clients might not expect this to happen.  The code as it is thrashes the
   // use/def lists, which is kinda lame.
-  for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) {
-    OL[i-2] = OL[i];
-    OL[i-2+1] = OL[i+1];
-  }
+  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
 
   // Nuke the last value.
-  OL[NumOps-2].set(0);
-  OL[NumOps-2+1].set(0);
-  NumOperands = NumOps-2;
+  Op<-1>().set(0);
+  --NumOperands;
 
   // If the PHI node is dead, because it has zero entries, nuke it now.
-  if (NumOps == 2 && DeletePHIIfEmpty) {
+  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
     // If anyone is using this PHI, make them use a dummy value instead...
     replaceAllUsesWith(UndefValue::get(getType()));
     eraseFromParent();
@@ -137,15 +140,18 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
 ///
 void PHINode::growOperands() {
   unsigned e = getNumOperands();
-  // Multiply by 1.5 and round down so the result is still even.
-  unsigned NumOps = e + e / 4 * 2;
-  if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
+  unsigned NumOps = e + e / 2;
+  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
+
+  Use *OldOps = op_begin();
+  BasicBlock **OldBlocks = block_begin();
 
   ReservedSpace = NumOps;
-  Use *OldOps = OperandList;
-  Use *NewOps = allocHungoffUses(NumOps);
-  std::copy(OldOps, OldOps + e, NewOps);
-  OperandList = NewOps;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  std::copy(OldOps, OldOps + e, op_begin());
+  std::copy(OldBlocks, OldBlocks + e, block_begin());
+
   Use::zap(OldOps, OldOps + e, true);
 }
 
@@ -168,95 +174,42 @@ Value *PHINode::hasConstantValue() const {
 CallInst::~CallInst() {
 }
 
-void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
-  assert(NumOperands == NumParams+1 && "NumOperands not set up?");
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
-  assert((NumParams == FTy->getNumParams() ||
-          (FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
+  assert((Args.size() == FTy->getNumParams() ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Calling a function with bad signature!");
-  for (unsigned i = 0; i != NumParams; ++i) {
+
+  for (unsigned i = 0; i != Args.size(); ++i)
     assert((i >= FTy->getNumParams() || 
-            FTy->getParamType(i) == Params[i]->getType()) &&
+            FTy->getParamType(i) == Args[i]->getType()) &&
            "Calling a function with a bad signature!");
-    OperandList[i] = Params[i];
-  }
-}
-
-void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
-  assert(NumOperands == 3 && "NumOperands not set up?");
-  Op<-1>() = Func;
-  Op<0>() = Actual1;
-  Op<1>() = Actual2;
-
-  const FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
-
-  assert((FTy->getNumParams() == 2 ||
-          (FTy->isVarArg() && FTy->getNumParams() < 2)) &&
-         "Calling a function with bad signature");
-  assert((0 >= FTy->getNumParams() || 
-          FTy->getParamType(0) == Actual1->getType()) &&
-         "Calling a function with a bad signature!");
-  assert((1 >= FTy->getNumParams() || 
-          FTy->getParamType(1) == Actual2->getType()) &&
-         "Calling a function with a bad signature!");
-}
-
-void CallInst::init(Value *Func, Value *Actual) {
-  assert(NumOperands == 2 && "NumOperands not set up?");
-  Op<-1>() = Func;
-  Op<0>() = Actual;
-
-  const FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
+#endif
 
-  assert((FTy->getNumParams() == 1 ||
-          (FTy->isVarArg() && FTy->getNumParams() == 0)) &&
-         "Calling a function with bad signature");
-  assert((0 == FTy->getNumParams() || 
-          FTy->getParamType(0) == Actual->getType()) &&
-         "Calling a function with a bad signature!");
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
 }
 
-void CallInst::init(Value *Func) {
+void CallInst::init(Value *Func, const Twine &NameStr) {
   assert(NumOperands == 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
-}
+#endif
 
-CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
-                   Instruction *InsertBefore)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 2,
-                2, InsertBefore) {
-  init(Func, Actual);
-  setName(Name);
+  setName(NameStr);
 }
 
-CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
-                   BasicBlock  *InsertAtEnd)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 2,
-                2, InsertAtEnd) {
-  init(Func, Actual);
-  setName(Name);
-}
 CallInst::CallInst(Value *Func, const Twine &Name,
                    Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
@@ -264,8 +217,7 @@ CallInst::CallInst(Value *Func, const Twine &Name,
                 Instruction::Call,
                 OperandTraits<CallInst>::op_end(this) - 1,
                 1, InsertBefore) {
-  init(Func);
-  setName(Name);
+  init(Func, Name);
 }
 
 CallInst::CallInst(Value *Func, const Twine &Name,
@@ -275,8 +227,7 @@ CallInst::CallInst(Value *Func, const Twine &Name,
                 Instruction::Call,
                 OperandTraits<CallInst>::op_end(this) - 1,
                 1, InsertAtEnd) {
-  init(Func);
-  setName(Name);
+  init(Func, Name);
 }
 
 CallInst::CallInst(const CallInst &CI)
@@ -287,10 +238,7 @@ CallInst::CallInst(const CallInst &CI)
   setTailCall(CI.isTailCall());
   setCallingConv(CI.getCallingConv());
     
-  Use *OL = OperandList;
-  Use *InOL = CI.OperandList;
-  for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i)
-    OL[i] = InOL[i];
+  std::copy(CI.op_begin(), CI.op_end(), op_begin());
   SubclassOptionalData = CI.SubclassOptionalData;
 }
 
@@ -366,7 +314,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   // Create the call to Malloc.
   BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
   Module* M = BB->getParent()->getParent();
-  const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  Type *BPTy = Type::getInt8PtrTy(BB->getContext());
   Value *MallocFunc = MallocF;
   if (!MallocFunc)
     // prototype malloc as "void *malloc(size_t)"
@@ -481,27 +429,28 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
 //===----------------------------------------------------------------------===//
 
 void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
-                      Value* const *Args, unsigned NumArgs) {
-  assert(NumOperands == 3+NumArgs && "NumOperands not set up?");
+                      ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
   Op<-3>() = Fn;
   Op<-2>() = IfNormal;
   Op<-1>() = IfException;
+
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
-  assert(((NumArgs == FTy->getNumParams()) ||
-          (FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
+  assert(((Args.size() == FTy->getNumParams()) ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Invoking a function with bad signature");
 
-  Use *OL = OperandList;
-  for (unsigned i = 0, e = NumArgs; i != e; i++) {
+  for (unsigned i = 0, e = Args.size(); i != e; i++)
     assert((i >= FTy->getNumParams() || 
             FTy->getParamType(i) == Args[i]->getType()) &&
            "Invoking a function with a bad signature!");
-    
-    OL[i] = Args[i];
-  }
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
 }
 
 InvokeInst::InvokeInst(const InvokeInst &II)
@@ -511,9 +460,7 @@ InvokeInst::InvokeInst(const InvokeInst &II)
                    II.getNumOperands()) {
   setAttributes(II.getAttributes());
   setCallingConv(II.getCallingConv());
-  Use *OL = OperandList, *InOL = II.OperandList;
-  for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i)
-    OL[i] = InOL[i];
+  std::copy(II.op_begin(), II.op_end(), op_begin());
   SubclassOptionalData = II.SubclassOptionalData;
 }
 
@@ -817,7 +764,7 @@ bool AllocaInst::isArrayAllocation() const {
   return true;
 }
 
-const Type *AllocaInst::getAllocatedType() const {
+Type *AllocaInst::getAllocatedType() const {
   return getType()->getElementType();
 }
 
@@ -1092,7 +1039,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
                                      const Twine &Name, Instruction *InBe)
   : Instruction(PointerType::get(
-      checkType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
+      checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
                 2, InBe) {
@@ -1102,7 +1049,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
                                      const Twine &Name, BasicBlock *IAE)
   : Instruction(PointerType::get(
-            checkType(getIndexedType(Ptr->getType(),Idx)),  
+            checkGEPType(getIndexedType(Ptr->getType(),Idx)),  
                 retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
@@ -1120,60 +1067,50 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
 /// pointer type.
 ///
 template <typename IndexTy>
-static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
-                                          unsigned NumIdx) {
+static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+                                    unsigned NumIdx) {
   const PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
-  const Type *Agg = PTy->getElementType();
+  Type *Agg = PTy->getElementType();
 
   // Handle the special case of the empty set index set, which is always valid.
   if (NumIdx == 0)
     return Agg;
   
   // If there is at least one index, the top level type must be sized, otherwise
-  // it cannot be 'stepped over'.  We explicitly allow abstract types (those
-  // that contain opaque types) under the assumption that it will be resolved to
-  // a sane type later.
-  if (!Agg->isSized() && !Agg->isAbstract())
+  // it cannot be 'stepped over'.
+  if (!Agg->isSized())
     return 0;
 
   unsigned CurIdx = 1;
   for (; CurIdx != NumIdx; ++CurIdx) {
-    const CompositeType *CT = dyn_cast<CompositeType>(Agg);
+    CompositeType *CT = dyn_cast<CompositeType>(Agg);
     if (!CT || CT->isPointerTy()) return 0;
     IndexTy Index = Idxs[CurIdx];
     if (!CT->indexValid(Index)) return 0;
     Agg = CT->getTypeAtIndex(Index);
-
-    // If the new type forwards to another type, then it is in the middle
-    // of being refined to another type (and hence, may have dropped all
-    // references to what it was using before).  So, use the new forwarded
-    // type.
-    if (const Type *Ty = Agg->getForwardedType())
-      Agg = Ty;
   }
   return CurIdx == NumIdx ? Agg : 0;
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              Value* const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              Constant* const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        Constant* const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              uint64_t const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        uint64_t const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
   const PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
 
@@ -1390,27 +1327,22 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const {
 //                             InsertValueInst Class
 //===----------------------------------------------------------------------===//
 
-void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, 
-                           unsigned NumIdx, const Twine &Name) {
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
+                           const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) ==
-         Val->getType() && "Inserted value must match indexed type!");
-  Op<0>() = Agg;
-  Op<1>() = Val;
 
-  Indices.append(Idx, Idx + NumIdx);
-  setName(Name);
-}
+  // There's no fundamental reason why we require at least one index
+  // (other than weirdness with &*IdxBegin being invalid; see
+  // getelementptr's init routine for example). But there's no
+  // present need to support it.
+  assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
 
-void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, 
-                           const Twine &Name) {
-  assert(NumOperands == 2 && "NumOperands not initialized?");
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType()
-         && "Inserted value must match indexed type!");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+         Val->getType() && "Inserted value must match indexed type!");
   Op<0>() = Agg;
   Op<1>() = Val;
 
-  Indices.push_back(Idx);
+  Indices.append(Idxs.begin(), Idxs.end());
   setName(Name);
 }
 
@@ -1423,44 +1355,18 @@ InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
   SubclassOptionalData = IVI.SubclassOptionalData;
 }
 
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 unsigned Idx, 
-                                 const Twine &Name,
-                                 Instruction *InsertBefore)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertBefore) {
-  init(Agg, Val, Idx, Name);
-}
-
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 unsigned Idx, 
-                                 const Twine &Name,
-                                 BasicBlock *InsertAtEnd)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertAtEnd) {
-  init(Agg, Val, Idx, Name);
-}
-
 //===----------------------------------------------------------------------===//
 //                             ExtractValueInst Class
 //===----------------------------------------------------------------------===//
 
-void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
-                            const Twine &Name) {
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
   assert(NumOperands == 1 && "NumOperands not initialized?");
 
-  Indices.append(Idx, Idx + NumIdx);
-  setName(Name);
-}
-
-void ExtractValueInst::init(unsigned Idx, const Twine &Name) {
-  assert(NumOperands == 1 && "NumOperands not initialized?");
+  // There's no fundamental reason why we require at least one index.
+  // But there's no present need to support it.
+  assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
 
-  Indices.push_back(Idx);
+  Indices.append(Idxs.begin(), Idxs.end());
   setName(Name);
 }
 
@@ -1476,10 +1382,9 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
 // A null type is returned if the indices are invalid for the specified
 // pointer type.
 //
-const Type* ExtractValueInst::getIndexedType(const Type *Agg,
-                                             const unsigned *Idxs,
-                                             unsigned NumIdx) {
-  for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) {
+Type *ExtractValueInst::getIndexedType(const Type *Agg,
+                                       ArrayRef<unsigned> Idxs) {
+  for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
     unsigned Index = Idxs[CurIdx];
     // We can't use CompositeType::indexValid(Index) here.
     // indexValid() always returns true for arrays because getelementptr allows
@@ -1499,20 +1404,8 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
     }
 
     Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
-
-    // If the new type forwards to another type, then it is in the middle
-    // of being refined to another type (and hence, may have dropped all
-    // references to what it was using before).  So, use the new forwarded
-    // type.
-    if (const Type *Ty = Agg->getForwardedType())
-      Agg = Ty;
   }
-  return Agg;
-}
-
-const Type* ExtractValueInst::getIndexedType(const Type *Agg,
-                                             unsigned Idx) {
-  return getIndexedType(Agg, &Idx, 1);
+  return const_cast<Type*>(Agg);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 1bd497d05d4e..ebd1e0aa1b0f 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -39,6 +39,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
   // Create the 'tbaa' metadata kind.
   unsigned TBAAID = getMDKindID("tbaa");
   assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+  // Create the 'prof' metadata kind.
+  unsigned ProfID = getMDKindID("prof");
+  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index ccb8dc500fcd..504b37267f70 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -13,6 +13,7 @@
 
 #include "LLVMContextImpl.h"
 #include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -31,14 +32,10 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
     Int8Ty(C, 8),
     Int16Ty(C, 16),
     Int32Ty(C, 32),
-    Int64Ty(C, 64),
-    AlwaysOpaqueTy(new OpaqueType(C)) {
+    Int64Ty(C, 64) {
   InlineAsmDiagHandler = 0;
   InlineAsmDiagContext = 0;
-      
-  // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
-  AlwaysOpaqueTy->addRef();
-  OpaqueTypes.insert(AlwaysOpaqueTy);
+  NamedStructTypesUniqueID = 0;
 }
 
 namespace {
@@ -58,9 +55,7 @@ LLVMContextImpl::~LLVMContextImpl() {
   // will try to remove itself from OwnedModules set.  This would cause
   // iterator invalidation if we iterated on the set directly.
   std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
-  for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end();
-       I != E; ++I)
-    delete *I;
+  DeleteContainerPointers(Modules);
   
   std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
                 DropReferences());
@@ -78,38 +73,22 @@ LLVMContextImpl::~LLVMContextImpl() {
   NullPtrConstants.freeConstants();
   UndefValueConstants.freeConstants();
   InlineAsms.freeConstants();
-  for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); 
-       I != E; ++I) {
-    delete I->second;
-  }
-  for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); 
-       I != E; ++I) {
-    delete I->second;
-  }
-  AlwaysOpaqueTy->dropRef();
-  for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
-       I != E; ++I) {
-    (*I)->AbstractTypeUsers.clear();
-    delete *I;
-  }
+  DeleteContainerSeconds(IntConstants);
+  DeleteContainerSeconds(FPConstants);
+  
   // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
   // and the NonUniquedMDNodes sets, so copy the values out first.
   SmallVector<MDNode*, 8> MDNodes;
   MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
   for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
-       I != E; ++I) {
+       I != E; ++I)
     MDNodes.push_back(&*I);
-  }
   MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
   for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
-         E = MDNodes.end(); I != E; ++I) {
+         E = MDNodes.end(); I != E; ++I)
     (*I)->destroy();
-  }
   assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
          "Destroying all MDNodes didn't empty the Context's sets.");
   // Destroy MDStrings.
-  for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
-         E = MDStringCache.end(); I != E; ++I) {
-    delete I->second;
-  }
+  DeleteContainerSeconds(MDStringCache);
 }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 6ea4b48e79b7..06a6f2a25a38 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -15,17 +15,16 @@
 #ifndef LLVM_LLVMCONTEXT_IMPL_H
 #define LLVM_LLVMCONTEXT_IMPL_H
 
+#include "llvm/LLVMContext.h"
 #include "ConstantsContext.h"
 #include "LeaksContext.h"
-#include "TypesContext.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Metadata.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -139,27 +138,30 @@ public:
   // on Context destruction.
   SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
   
-  ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+  ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
 
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType,
-    ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, StructType,
-    ConstantStruct, true /*largekey*/> StructConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
-                            ConstantVector> VectorConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+                            VectorType, ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
   
-  ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
-  ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
+  ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
+    NullPtrConstants;
+  ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
   
   DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
-  ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+    ExprConstants;
 
-  ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms;
+  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+                    InlineAsm> InlineAsms;
   
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
@@ -167,41 +169,27 @@ public:
   LeakDetectorImpl<Value> LLVMObjects;
   
   // Basic type instances.
-  const Type VoidTy;
-  const Type LabelTy;
-  const Type FloatTy;
-  const Type DoubleTy;
-  const Type MetadataTy;
-  const Type X86_FP80Ty;
-  const Type FP128Ty;
-  const Type PPC_FP128Ty;
-  const Type X86_MMXTy;
-  const IntegerType Int1Ty;
-  const IntegerType Int8Ty;
-  const IntegerType Int16Ty;
-  const IntegerType Int32Ty;
-  const IntegerType Int64Ty;
-
-  // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
-  // for types as they are needed.  Because resolution of types must invalidate
-  // all of the abstract type descriptions, we keep them in a separate map to
-  // make this easy.
-  TypePrinting ConcreteTypeDescriptions;
-  TypePrinting AbstractTypeDescriptions;
-  
-  TypeMap<ArrayValType, ArrayType> ArrayTypes;
-  TypeMap<VectorValType, VectorType> VectorTypes;
-  TypeMap<PointerValType, PointerType> PointerTypes;
-  TypeMap<FunctionValType, FunctionType> FunctionTypes;
-  TypeMap<StructValType, StructType> StructTypes;
-  TypeMap<IntegerValType, IntegerType> IntegerTypes;
-
-  // Opaque types are not structurally uniqued, so don't use TypeMap.
-  typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
-  OpaqueTypesTy OpaqueTypes;
-
-  /// Used as an abstract type that will never be resolved.
-  OpaqueType *const AlwaysOpaqueTy;
+  Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+  Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+  
+  /// TypeAllocator - All dynamically allocated types are allocated from this.
+  /// They live forever until the context is torn down.
+  BumpPtrAllocator TypeAllocator;
+  
+  DenseMap<unsigned, IntegerType*> IntegerTypes;
+  
+  // TODO: Optimize FunctionTypes/AnonStructTypes!
+  std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
+  std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+  StringMap<StructType*> NamedStructTypes;
+  unsigned NamedStructTypesUniqueID;
+    
+  DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
+  DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
 
 
   /// ValueHandles - This map keeps track of all of the value handles that are
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index eb719e54b289..ace4dc2de271 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ValueHandle.h"
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 341e527acb5b..be2fcb8ac6c0 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -17,12 +17,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/GVMaterializer.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/LeakDetector.h"
 #include "SymbolTableListTraitsImpl.h"
-#include "llvm/TypeSymbolTable.h"
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -60,7 +60,6 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
 Module::Module(StringRef MID, LLVMContext& C)
   : Context(C), Materializer(NULL), ModuleID(MID) {
   ValSymTab = new ValueSymbolTable();
-  TypeSymTab = new TypeSymbolTable();
   NamedMDSymTab = new StringMap<NamedMDNode *>();
   Context.addModule(this);
 }
@@ -74,11 +73,10 @@ Module::~Module() {
   LibraryList.clear();
   NamedMDList.clear();
   delete ValSymTab;
-  delete TypeSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
 }
 
-/// Target endian information...
+/// Target endian information.
 Module::Endianness Module::getEndianness() const {
   StringRef temp = DataLayout;
   Module::Endianness ret = AnyEndianness;
@@ -218,8 +216,8 @@ Constant *Module::getOrInsertFunction(StringRef Name,
   va_start(Args, RetTy);
 
   // Build the list of argument types...
-  std::vector<const Type*> ArgTys;
-  while (const Type *ArgTy = va_arg(Args, const Type*))
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
     ArgTys.push_back(ArgTy);
 
   va_end(Args);
@@ -236,8 +234,8 @@ Constant *Module::getOrInsertFunction(StringRef Name,
   va_start(Args, RetTy);
 
   // Build the list of argument types...
-  std::vector<const Type*> ArgTys;
-  while (const Type *ArgTy = va_arg(Args, const Type*))
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
     ArgTys.push_back(ArgTy);
 
   va_end(Args);
@@ -340,51 +338,6 @@ void Module::eraseNamedMetadata(NamedMDNode *NMD) {
   NamedMDList.erase(NMD);
 }
 
-//===----------------------------------------------------------------------===//
-// Methods for easy access to the types in the module.
-//
-
-
-// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
-// there is already an entry for this name, true is returned and the symbol
-// table is not modified.
-//
-bool Module::addTypeName(StringRef Name, const Type *Ty) {
-  TypeSymbolTable &ST = getTypeSymbolTable();
-
-  if (ST.lookup(Name)) return true;  // Already in symtab...
-
-  // Not in symbol table?  Set the name with the Symtab as an argument so the
-  // type knows what to update...
-  ST.insert(Name, Ty);
-
-  return false;
-}
-
-/// getTypeByName - Return the type with the specified name in this module, or
-/// null if there is none by that name.
-const Type *Module::getTypeByName(StringRef Name) const {
-  const TypeSymbolTable &ST = getTypeSymbolTable();
-  return cast_or_null<Type>(ST.lookup(Name));
-}
-
-// getTypeName - If there is at least one entry in the symbol table for the
-// specified type, return it.
-//
-std::string Module::getTypeName(const Type *Ty) const {
-  const TypeSymbolTable &ST = getTypeSymbolTable();
-
-  TypeSymbolTable::const_iterator TI = ST.begin();
-  TypeSymbolTable::const_iterator TE = ST.end();
-  if ( TI == TE ) return ""; // No names for types
-
-  while (TI != TE && TI->second != Ty)
-    ++TI;
-
-  if (TI != TE)  // Must have found an entry!
-    return TI->first;
-  return "";     // Must not have found anything...
-}
 
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
@@ -471,3 +424,130 @@ void Module::removeLibrary(StringRef Lib) {
       return;
     }
 }
+
+//===----------------------------------------------------------------------===//
+// Type finding functionality.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// TypeFinder - Walk over a module, identifying all of the types that are
+  /// used by the module.
+  class TypeFinder {
+    // To avoid walking constant expressions multiple times and other IR
+    // objects, we keep several helper maps.
+    DenseSet<const Value*> VisitedConstants;
+    DenseSet<const Type*> VisitedTypes;
+    
+    std::vector<StructType*> &StructTypes;
+  public:
+    TypeFinder(std::vector<StructType*> &structTypes)
+      : StructTypes(structTypes) {}
+    
+    void run(const Module &M) {
+      // Get types from global variables.
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (I->hasInitializer())
+          incorporateValue(I->getInitializer());
+      }
+      
+      // Get types from aliases.
+      for (Module::const_alias_iterator I = M.alias_begin(),
+           E = M.alias_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (const Value *Aliasee = I->getAliasee())
+          incorporateValue(Aliasee);
+      }
+      
+      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+      // Get types from functions.
+      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+        incorporateType(FI->getType());
+        
+        for (Function::const_iterator BB = FI->begin(), E = FI->end();
+             BB != E;++BB)
+          for (BasicBlock::const_iterator II = BB->begin(),
+               E = BB->end(); II != E; ++II) {
+            const Instruction &I = *II;
+            // Incorporate the type of the instruction and all its operands.
+            incorporateType(I.getType());
+            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+                 OI != OE; ++OI)
+              incorporateValue(*OI);
+            
+            // Incorporate types hiding in metadata.
+            I.getAllMetadataOtherThanDebugLoc(MDForInst);
+            for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+              incorporateMDNode(MDForInst[i].second);
+            MDForInst.clear();
+          }
+      }
+      
+      for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+        const NamedMDNode *NMD = I;
+        for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+          incorporateMDNode(NMD->getOperand(i));
+      }
+    }
+    
+  private:
+    void incorporateType(Type *Ty) {
+      // Check to see if we're already visited this type.
+      if (!VisitedTypes.insert(Ty).second)
+        return;
+      
+      // If this is a structure or opaque type, add a name for the type.
+      if (StructType *STy = dyn_cast<StructType>(Ty))
+        StructTypes.push_back(STy);
+      
+      // Recursively walk all contained types.
+      for (Type::subtype_iterator I = Ty->subtype_begin(),
+           E = Ty->subtype_end(); I != E; ++I)
+        incorporateType(*I);
+    }
+    
+    /// incorporateValue - This method is used to walk operand lists finding
+    /// types hiding in constant expressions and other operands that won't be
+    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
+    /// inst operands are all explicitly enumerated.
+    void incorporateValue(const Value *V) {
+      if (const MDNode *M = dyn_cast<MDNode>(V))
+        return incorporateMDNode(M);
+      if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Check this type.
+      incorporateType(V->getType());
+      
+      // Look in operands for types.
+      const User *U = cast<User>(V);
+      for (Constant::const_op_iterator I = U->op_begin(),
+           E = U->op_end(); I != E;++I)
+        incorporateValue(*I);
+    }
+    
+    void incorporateMDNode(const MDNode *V) {
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Look in operands for types.
+      for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+        if (Value *Op = V->getOperand(i))
+          incorporateValue(Op);
+    }
+  };
+} // end anonymous namespace
+
+void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const {
+  TypeFinder(StructTypes).run(*this);
+}
+
+
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index e4496db4317f..f874d1b28302 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -12,95 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Threading.h"
+#include "llvm/Module.h"
 #include <algorithm>
 #include <cstdarg>
+#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
-// DEBUG_MERGE_TYPES - Enable this #define to see how and when derived types are
-// created and later destroyed, all in an effort to make sure that there is only
-// a single canonical version of a type.
-//
-// #define DEBUG_MERGE_TYPES 1
-
-AbstractTypeUser::~AbstractTypeUser() {}
-
-void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
-  V->VTy = NewTy;
-}
-
 //===----------------------------------------------------------------------===//
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
-/// Because of the way Type subclasses are allocated, this function is necessary
-/// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy) allocate additional space
-/// after the space for their derived type to hold the contained types array of
-/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
-/// allocated with the type object, decreasing allocations and eliminating the
-/// need for a std::vector to be used in the Type class itself. 
-/// @brief Type destruction function
-void Type::destroy() const {
-  // Nothing calls getForwardedType from here on.
-  if (ForwardType && ForwardType->isAbstract()) {
-    ForwardType->dropRef();
-    ForwardType = NULL;
-  }
-
-  // Structures and Functions allocate their contained types past the end of
-  // the type object itself. These need to be destroyed differently than the
-  // other types.
-  if (this->isFunctionTy() || this->isStructTy()) {
-    // First, make sure we destruct any PATypeHandles allocated by these
-    // subclasses.  They must be manually destructed. 
-    for (unsigned i = 0; i < NumContainedTys; ++i)
-      ContainedTys[i].PATypeHandle::~PATypeHandle();
-
-    // Now call the destructor for the subclass directly because we're going
-    // to delete this as an array of char.
-    if (this->isFunctionTy())
-      static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
-    else {
-      assert(isStructTy());
-      static_cast<const StructType*>(this)->StructType::~StructType();
-    }
-
-    // Finally, remove the memory as an array deallocation of the chars it was
-    // constructed from.
-    operator delete(const_cast<Type *>(this));
-
-    return;
-  } else if (const OpaqueType *opaque_this = dyn_cast<OpaqueType>(this)) {
-    LLVMContextImpl *pImpl = this->getContext().pImpl;
-    pImpl->OpaqueTypes.erase(opaque_this);
-  }
-
-  // For all the other type subclasses, there is either no contained types or 
-  // just one (all Sequentials). For Sequentials, the PATypeHandle is not
-  // allocated past the type object, its included directly in the SequentialType
-  // class. This means we can safely just do "normal" delete of this object and
-  // all the destructors that need to run will be run.
-  delete this; 
-}
-
-const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   switch (IDNumber) {
   case VoidTyID      : return getVoidTy(C);
   case FloatTyID     : return getFloatTy(C);
@@ -116,15 +38,6 @@ const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   }
 }
 
-const Type *Type::getVAArgsPromotedType(LLVMContext &C) const {
-  if (ID == IntegerTyID && getSubclassData() < 32)
-    return Type::getInt32Ty(C);
-  else if (ID == FloatTyID)
-    return Type::getDoubleTy(C);
-  else
-    return this;
-}
-
 /// getScalarType - If this is a vector type, return the element type,
 /// otherwise return this.
 const Type *Type::getScalarType() const {
@@ -262,13 +175,17 @@ bool Type::isSizedDerivedType() const {
   if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
     return ATy->getElementType()->isSized();
 
-  if (const VectorType *PTy = dyn_cast<VectorType>(this))
-    return PTy->getElementType()->isSized();
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->isSized();
 
   if (!this->isStructTy()) 
     return false;
 
-  // Okay, our struct is sized if all of the elements are...
+  // Opaque structs have no size.
+  if (cast<StructType>(this)->isOpaque())
+    return false;
+  
+  // Okay, our struct is sized if all of the elements are.
   for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
     if (!(*I)->isSized()) 
       return false;
@@ -276,696 +193,335 @@ bool Type::isSizedDerivedType() const {
   return true;
 }
 
-/// getForwardedTypeInternal - This method is used to implement the union-find
-/// algorithm for when a type is being forwarded to another type.
-const Type *Type::getForwardedTypeInternal() const {
-  assert(ForwardType && "This type is not being forwarded to another type!");
-
-  // Check to see if the forwarded type has been forwarded on.  If so, collapse
-  // the forwarding links.
-  const Type *RealForwardedType = ForwardType->getForwardedType();
-  if (!RealForwardedType)
-    return ForwardType;  // No it's not forwarded again
-
-  // Yes, it is forwarded again.  First thing, add the reference to the new
-  // forward type.
-  if (RealForwardedType->isAbstract())
-    RealForwardedType->addRef();
-
-  // Now drop the old reference.  This could cause ForwardType to get deleted.
-  // ForwardType must be abstract because only abstract types can have their own
-  // ForwardTypes.
-  ForwardType->dropRef();
-
-  // Return the updated type.
-  ForwardType = RealForwardedType;
-  return ForwardType;
-}
-
-void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-  llvm_unreachable("Attempting to refine a derived type!");
-}
-void Type::typeBecameConcrete(const DerivedType *AbsTy) {
-  llvm_unreachable("DerivedType is already a concrete type!");
-}
-
-
-std::string Type::getDescription() const {
-  LLVMContextImpl *pImpl = getContext().pImpl;
-  TypePrinting &Map =
-    isAbstract() ?
-      pImpl->AbstractTypeDescriptions :
-      pImpl->ConcreteTypeDescriptions;
-  
-  std::string DescStr;
-  raw_string_ostream DescOS(DescStr);
-  Map.print(this, DescOS);
-  return DescOS.str();
-}
-
-
-bool StructType::indexValid(const Value *V) const {
-  // Structure indexes require 32-bit integer constants.
-  if (V->getType()->isIntegerTy(32))
-    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
-      return indexValid(CU->getZExtValue());
-  return false;
-}
-
-bool StructType::indexValid(unsigned V) const {
-  return V < NumContainedTys;
-}
-
-// getTypeAtIndex - Given an index value into the type, return the type of the
-// element.  For a structure type, this must be a constant value...
-//
-const Type *StructType::getTypeAtIndex(const Value *V) const {
-  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
-  return getTypeAtIndex(Idx);
-}
-
-const Type *StructType::getTypeAtIndex(unsigned Idx) const {
-  assert(indexValid(Idx) && "Invalid structure index!");
-  return ContainedTys[Idx];
-}
-
-
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
 
-const Type *Type::getVoidTy(LLVMContext &C) {
-  return &C.pImpl->VoidTy;
-}
-
-const Type *Type::getLabelTy(LLVMContext &C) {
-  return &C.pImpl->LabelTy;
-}
-
-const Type *Type::getFloatTy(LLVMContext &C) {
-  return &C.pImpl->FloatTy;
-}
-
-const Type *Type::getDoubleTy(LLVMContext &C) {
-  return &C.pImpl->DoubleTy;
-}
-
-const Type *Type::getMetadataTy(LLVMContext &C) {
-  return &C.pImpl->MetadataTy;
-}
-
-const Type *Type::getX86_FP80Ty(LLVMContext &C) {
-  return &C.pImpl->X86_FP80Ty;
-}
-
-const Type *Type::getFP128Ty(LLVMContext &C) {
-  return &C.pImpl->FP128Ty;
-}
-
-const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
-  return &C.pImpl->PPC_FP128Ty;
-}
-
-const Type *Type::getX86_MMXTy(LLVMContext &C) {
-  return &C.pImpl->X86_MMXTy;
-}
-
-const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
   return IntegerType::get(C, N);
 }
 
-const IntegerType *Type::getInt1Ty(LLVMContext &C) {
-  return &C.pImpl->Int1Ty;
-}
-
-const IntegerType *Type::getInt8Ty(LLVMContext &C) {
-  return &C.pImpl->Int8Ty;
-}
-
-const IntegerType *Type::getInt16Ty(LLVMContext &C) {
-  return &C.pImpl->Int16Ty;
-}
-
-const IntegerType *Type::getInt32Ty(LLVMContext &C) {
-  return &C.pImpl->Int32Ty;
-}
-
-const IntegerType *Type::getInt64Ty(LLVMContext &C) {
-  return &C.pImpl->Int64Ty;
-}
-
-const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
   return getFloatTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
   return getDoubleTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
   return getX86_FP80Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
   return getFP128Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
   return getPPC_FP128Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
   return getX86_MMXTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
   return getIntNTy(C, N)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
   return getInt1Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
   return getInt8Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
   return getInt16Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
   return getInt32Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
   return getInt64Ty(C)->getPointerTo(AS);
 }
 
+
 //===----------------------------------------------------------------------===//
-//                          Derived Type Constructors
+//                       IntegerType Implementation
 //===----------------------------------------------------------------------===//
 
-/// isValidReturnType - Return true if the specified type is valid as a return
-/// type.
-bool FunctionType::isValidReturnType(const Type *RetTy) {
-  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
-         !RetTy->isMetadataTy();
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+  
+  // Check for the built-in integer types
+  switch (NumBits) {
+  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+  default: 
+    break;
+  }
+  
+  IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+  
+  if (Entry == 0)
+    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+  
+  return Entry;
 }
 
-/// isValidArgumentType - Return true if the specified type is valid as an
-/// argument type.
-bool FunctionType::isValidArgumentType(const Type *ArgTy) {
-  return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
+bool IntegerType::isPowerOf2ByteWidth() const {
+  unsigned BitWidth = getBitWidth();
+  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
 }
 
-FunctionType::FunctionType(const Type *Result,
-                           ArrayRef<const Type*> Params,
+APInt IntegerType::getMask() const {
+  return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+//                       FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
                            bool IsVarArgs)
-  : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) {
-  ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
-  NumContainedTys = Params.size() + 1; // + 1 for result type
+  : Type(Result->getContext(), FunctionTyID) {
+  Type **SubTys = reinterpret_cast<Type**>(this+1);
   assert(isValidReturnType(Result) && "invalid return type for function");
+  setSubclassData(IsVarArgs);
 
+  SubTys[0] = const_cast<Type*>(Result);
 
-  bool isAbstract = Result->isAbstract();
-  new (&ContainedTys[0]) PATypeHandle(Result, this);
-
-  for (unsigned i = 0; i != Params.size(); ++i) {
+  for (unsigned i = 0, e = Params.size(); i != e; ++i) {
     assert(isValidArgumentType(Params[i]) &&
            "Not a valid type for function argument!");
-    new (&ContainedTys[i+1]) PATypeHandle(Params[i], this);
-    isAbstract |= Params[i]->isAbstract();
+    SubTys[i+1] = Params[i];
   }
 
-  // Calculate whether or not this type is abstract
-  setAbstract(isAbstract);
+  ContainedTys = SubTys;
+  NumContainedTys = Params.size() + 1; // + 1 for result type
 }
 
-StructType::StructType(LLVMContext &C, 
-                       ArrayRef<const Type*> Types, bool isPacked)
-  : CompositeType(C, StructTyID) {
-  ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
-  NumContainedTys = Types.size();
-  setSubclassData(isPacked);
-  bool isAbstract = false;
-  for (unsigned i = 0; i < Types.size(); ++i) {
-    assert(Types[i] && "<null> type for structure field!");
-    assert(isValidElementType(Types[i]) &&
-           "Invalid type for structure element!");
-    new (&ContainedTys[i]) PATypeHandle(Types[i], this);
-    isAbstract |= Types[i]->isAbstract();
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(const Type *ReturnType,
+                                ArrayRef<Type*> Params, bool isVarArg) {
+  // TODO: This is brutally slow.
+  std::vector<Type*> Key;
+  Key.reserve(Params.size()+2);
+  Key.push_back(const_cast<Type*>(ReturnType));
+  for (unsigned i = 0, e = Params.size(); i != e; ++i)
+    Key.push_back(const_cast<Type*>(Params[i]));
+  if (isVarArg)
+    Key.push_back(0);
+  
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  FunctionType *&FT = pImpl->FunctionTypes[Key];
+  
+  if (FT == 0) {
+    FT = (FunctionType*) pImpl->TypeAllocator.
+      Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+               AlignOf<FunctionType>::Alignment);
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
   }
 
-  // Calculate whether or not this type is abstract
-  setAbstract(isAbstract);
-}
-
-ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
-  : SequentialType(ArrayTyID, ElType) {
-  NumElements = NumEl;
-
-  // Calculate whether or not this type is abstract
-  setAbstract(ElType->isAbstract());
-}
-
-VectorType::VectorType(const Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType) {
-  NumElements = NumEl;
-  setAbstract(ElType->isAbstract());
-  assert(NumEl > 0 && "NumEl of a VectorType must be greater than 0");
-  assert(isValidElementType(ElType) &&
-         "Elements of a VectorType must be a primitive type");
-
-}
-
-
-PointerType::PointerType(const Type *E, unsigned AddrSpace)
-  : SequentialType(PointerTyID, E) {
-  AddressSpace = AddrSpace;
-  // Calculate whether or not this type is abstract
-  setAbstract(E->isAbstract());
-}
-
-OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) {
-  setAbstract(true);
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *this << "\n");
-#endif
+  return FT;
 }
 
-void PATypeHolder::destroy() {
-  Ty = 0;
-}
 
-// dropAllTypeUses - When this (abstract) type is resolved to be equal to
-// another (more concrete) type, we must eliminate all references to other
-// types, to avoid some circular reference problems.
-void DerivedType::dropAllTypeUses() {
-  if (NumContainedTys != 0) {
-    // The type must stay abstract.  To do this, we insert a pointer to a type
-    // that will never get resolved, thus will always be abstract.
-    ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
-
-    // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
-    // pick so long as it doesn't point back to this type.  We choose something
-    // concrete to avoid overhead for adding to AbstractTypeUser lists and
-    // stuff.
-    const Type *ConcreteTy = Type::getInt32Ty(getContext());
-    for (unsigned i = 1, e = NumContainedTys; i != e; ++i)
-      ContainedTys[i] = ConcreteTy;
-  }
+FunctionType *FunctionType::get(const Type *Result, bool isVarArg) {
+  return get(Result, ArrayRef<Type *>(), isVarArg);
 }
 
 
-namespace {
-
-/// TypePromotionGraph and graph traits - this is designed to allow us to do
-/// efficient SCC processing of type graphs.  This is the exact same as
-/// GraphTraits<Type*>, except that we pretend that concrete types have no
-/// children to avoid processing them.
-struct TypePromotionGraph {
-  Type *Ty;
-  TypePromotionGraph(Type *T) : Ty(T) {}
-};
-
-}
-
-namespace llvm {
-  template <> struct GraphTraits<TypePromotionGraph> {
-    typedef Type NodeType;
-    typedef Type::subtype_iterator ChildIteratorType;
-
-    static inline NodeType *getEntryNode(TypePromotionGraph G) { return G.Ty; }
-    static inline ChildIteratorType child_begin(NodeType *N) {
-      if (N->isAbstract())
-        return N->subtype_begin();
-      // No need to process children of concrete types.
-      return N->subtype_end();
-    }
-    static inline ChildIteratorType child_end(NodeType *N) {
-      return N->subtype_end();
-    }
-  };
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+  !RetTy->isMetadataTy();
 }
 
-
-// PromoteAbstractToConcrete - This is a recursive function that walks a type
-// graph calculating whether or not a type is abstract.
-//
-void Type::PromoteAbstractToConcrete() {
-  if (!isAbstract()) return;
-
-  scc_iterator<TypePromotionGraph> SI = scc_begin(TypePromotionGraph(this));
-  scc_iterator<TypePromotionGraph> SE = scc_end  (TypePromotionGraph(this));
-
-  for (; SI != SE; ++SI) {
-    std::vector<Type*> &SCC = *SI;
-
-    // Concrete types are leaves in the tree.  Since an SCC will either be all
-    // abstract or all concrete, we only need to check one type.
-    if (!SCC[0]->isAbstract()) continue;
-    
-    if (SCC[0]->isOpaqueTy())
-      return;     // Not going to be concrete, sorry.
-
-    // If all of the children of all of the types in this SCC are concrete,
-    // then this SCC is now concrete as well.  If not, neither this SCC, nor
-    // any parent SCCs will be concrete, so we might as well just exit.
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
-             E = SCC[i]->subtype_end(); CI != E; ++CI)
-        if ((*CI)->isAbstract())
-          // If the child type is in our SCC, it doesn't make the entire SCC
-          // abstract unless there is a non-SCC abstract type.
-          if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
-            return;               // Not going to be concrete, sorry.
-
-    // Okay, we just discovered this whole SCC is now concrete, mark it as
-    // such!
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-      assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
-
-      SCC[i]->setAbstract(false);
-    }
-
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-      assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
-      // The type just became concrete, notify all users!
-      cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
-    }
-  }
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+  return ArgTy->isFirstClassType();
 }
 
-
 //===----------------------------------------------------------------------===//
-//                      Type Structural Equality Testing
+//                       StructType Implementation
 //===----------------------------------------------------------------------===//
 
-// TypesEqual - Two types are considered structurally equal if they have the
-// same "shape": Every level and element of the types have identical primitive
-// ID's, and the graphs have the same edges/nodes in them.  Nodes do not have to
-// be pointer equals to be equivalent though.  This uses an optimistic algorithm
-// that assumes that two graphs are the same until proven otherwise.
-//
-static bool TypesEqual(const Type *Ty, const Type *Ty2,
-                       std::map<const Type *, const Type *> &EqTypes) {
-  if (Ty == Ty2) return true;
-  if (Ty->getTypeID() != Ty2->getTypeID()) return false;
-  if (Ty->isOpaqueTy())
-    return false;  // Two unequal opaque types are never equal
-
-  std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
-  if (It != EqTypes.end())
-    return It->second == Ty2;    // Looping back on a type, check for equality
-
-  // Otherwise, add the mapping to the table to make sure we don't get
-  // recursion on the types...
-  EqTypes.insert(It, std::make_pair(Ty, Ty2));
-
-  // Two really annoying special cases that breaks an otherwise nice simple
-  // algorithm is the fact that arraytypes have sizes that differentiates types,
-  // and that function types can be varargs or not.  Consider this now.
-  //
-  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
-    const IntegerType *ITy2 = cast<IntegerType>(Ty2);
-    return ITy->getBitWidth() == ITy2->getBitWidth();
-  }
-  
-  if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-    const PointerType *PTy2 = cast<PointerType>(Ty2);
-    return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
-           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
+                            bool isPacked) {
+  // FIXME: std::vector is horribly inefficient for this probe.
+  std::vector<Type*> Key;
+  for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
+    assert(isValidElementType(ETypes[i]) &&
+           "Invalid type for structure element!");
+    Key.push_back(ETypes[i]);
   }
+  if (isPacked)
+    Key.push_back(0);
   
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructType *STy2 = cast<StructType>(Ty2);
-    if (STy->getNumElements() != STy2->getNumElements()) return false;
-    if (STy->isPacked() != STy2->isPacked()) return false;
-    for (unsigned i = 0, e = STy2->getNumElements(); i != e; ++i)
-      if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
-        return false;
-    return true;
-  }
+  StructType *&ST = Context.pImpl->AnonStructTypes[Key];
+  if (ST) return ST;
   
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const ArrayType *ATy2 = cast<ArrayType>(Ty2);
-    return ATy->getNumElements() == ATy2->getNumElements() &&
-           TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
-  }
+  // Value not found.  Create a new type!
+  ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  ST->setSubclassData(SCDB_IsAnonymous);  // Anonymous struct.
+  ST->setBody(ETypes, isPacked);
+  return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+  assert(isOpaque() && "Struct body already set!");
   
-  if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
-    const VectorType *PTy2 = cast<VectorType>(Ty2);
-    return PTy->getNumElements() == PTy2->getNumElements() &&
-           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
-  }
+  setSubclassData(getSubclassData() | SCDB_HasBody);
+  if (isPacked)
+    setSubclassData(getSubclassData() | SCDB_Packed);
   
-  if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
-    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
-    if (FTy->isVarArg() != FTy2->isVarArg() ||
-        FTy->getNumParams() != FTy2->getNumParams() ||
-        !TypesEqual(FTy->getReturnType(), FTy2->getReturnType(), EqTypes))
-      return false;
-    for (unsigned i = 0, e = FTy2->getNumParams(); i != e; ++i) {
-      if (!TypesEqual(FTy->getParamType(i), FTy2->getParamType(i), EqTypes))
-        return false;
-    }
-    return true;
-  }
+  Type **Elts = getContext().pImpl->
+    TypeAllocator.Allocate<Type*>(Elements.size());
+  memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
   
-  llvm_unreachable("Unknown derived type!");
-  return false;
-}
-
-namespace llvm { // in namespace llvm so findable by ADL
-static bool TypesEqual(const Type *Ty, const Type *Ty2) {
-  std::map<const Type *, const Type *> EqTypes;
-  return ::TypesEqual(Ty, Ty2, EqTypes);
-}
-}
-
-// AbstractTypeHasCycleThrough - Return true there is a path from CurTy to
-// TargetTy in the type graph.  We know that Ty is an abstract type, so if we
-// ever reach a non-abstract type, we know that we don't need to search the
-// subgraph.
-static bool AbstractTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
-                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
-  if (TargetTy == CurTy) return true;
-  if (!CurTy->isAbstract()) return false;
-
-  if (!VisitedTypes.insert(CurTy))
-    return false;  // Already been here.
-
-  for (Type::subtype_iterator I = CurTy->subtype_begin(),
-       E = CurTy->subtype_end(); I != E; ++I)
-    if (AbstractTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
-      return true;
-  return false;
+  ContainedTys = Elts;
+  NumContainedTys = Elements.size();
 }
 
-static bool ConcreteTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
-                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
-  if (TargetTy == CurTy) return true;
-
-  if (!VisitedTypes.insert(CurTy))
-    return false;  // Already been here.
-
-  for (Type::subtype_iterator I = CurTy->subtype_begin(),
-       E = CurTy->subtype_end(); I != E; ++I)
-    if (ConcreteTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
-      return true;
-  return false;
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) {
+  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  if (!Name.empty())
+    ST->setName(Name);
+  return ST;
 }
 
-/// TypeHasCycleThroughItself - Return true if the specified type has
-/// a cycle back to itself.
+void StructType::setName(StringRef Name) {
+  if (Name == getName()) return;
 
-namespace llvm { // in namespace llvm so it's findable by ADL
-static bool TypeHasCycleThroughItself(const Type *Ty) {
-  SmallPtrSet<const Type*, 128> VisitedTypes;
-
-  if (Ty->isAbstract()) {  // Optimized case for abstract types.
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (AbstractTypeHasCycleThrough(Ty, *I, VisitedTypes))
-        return true;
-  } else {
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (ConcreteTypeHasCycleThrough(Ty, *I, VisitedTypes))
-        return true;
+  // If this struct already had a name, remove its symbol table entry.
+  if (SymbolTableEntry) {
+    getContext().pImpl->NamedStructTypes.erase(getName());
+    SymbolTableEntry = 0;
   }
-  return false;
-}
-}
-
-//===----------------------------------------------------------------------===//
-// Function Type Factory and Value Class...
-//
-const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
-  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
-  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
-
-  // Check for the built-in integer types
-  switch (NumBits) {
-  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
-  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
-  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
-  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
-  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
-  default: 
-    break;
-  }
-
-  LLVMContextImpl *pImpl = C.pImpl;
   
-  IntegerValType IVT(NumBits);
-  IntegerType *ITy = 0;
+  // If this is just removing the name, we're done.
+  if (Name.empty())
+    return;
   
-  // First, see if the type is already in the table, for which
-  // a reader lock suffices.
-  ITy = pImpl->IntegerTypes.get(IVT);
-    
-  if (!ITy) {
-    // Value not found.  Derive a new type!
-    ITy = new IntegerType(C, NumBits);
-    pImpl->IntegerTypes.add(IVT, ITy);
+  // Look up the entry for the name.
+  StringMapEntry<StructType*> *Entry =
+    &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+  
+  // While we have a name collision, try a random rename.
+  if (Entry->getValue()) {
+    SmallString<64> TempStr(Name);
+    TempStr.push_back('.');
+    raw_svector_ostream TmpStream(TempStr);
+   
+    do {
+      TempStr.resize(Name.size()+1);
+      TmpStream.resync();
+      TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+      
+      Entry = &getContext().pImpl->
+                 NamedStructTypes.GetOrCreateValue(TmpStream.str());
+    } while (Entry->getValue());
   }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *ITy << "\n");
-#endif
-  return ITy;
-}
 
-bool IntegerType::isPowerOf2ByteWidth() const {
-  unsigned BitWidth = getBitWidth();
-  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+  // Okay, we found an entry that isn't used.  It's us!
+  Entry->setValue(this);
+    
+  SymbolTableEntry = Entry;
 }
 
-APInt IntegerType::getMask() const {
-  return APInt::getAllOnesValue(getBitWidth());
-}
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
 
-FunctionValType FunctionValType::get(const FunctionType *FT) {
-  // Build up a FunctionValType
-  std::vector<const Type *> ParamTypes;
-  ParamTypes.reserve(FT->getNumParams());
-  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
-    ParamTypes.push_back(FT->getParamType(i));
-  return FunctionValType(FT->getReturnType(), ParamTypes, FT->isVarArg());
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
 }
 
-
-// FunctionType::get - The factory function for the FunctionType class...
-FunctionType *FunctionType::get(const Type *ReturnType,
-                                ArrayRef<const Type*> Params,
-                                bool isVarArg) {
-  FunctionValType VT(ReturnType, Params, isVarArg);
-  FunctionType *FT = 0;
-  
-  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
-  
-  FT = pImpl->FunctionTypes.get(VT);
-  
-  if (!FT) {
-    FT = (FunctionType*) operator new(sizeof(FunctionType) +
-                                    sizeof(PATypeHandle)*(Params.size()+1));
-    new (FT) FunctionType(ReturnType, Params, isVarArg);
-    pImpl->FunctionTypes.add(VT, FT);
+StructType *StructType::get(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
   }
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << FT << "\n");
-#endif
-  return FT;
+  return llvm::StructType::get(Ctx, StructFields);
 }
 
-ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
-  assert(ElementType && "Can't get array of <null> types!");
-  assert(isValidElementType(ElementType) && "Invalid type for array element!");
-
-  ArrayValType AVT(ElementType, NumElements);
-  ArrayType *AT = 0;
-
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  
-  AT = pImpl->ArrayTypes.get(AVT);
-      
-  if (!AT) {
-    // Value not found.  Derive a new type!
-    pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements));
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *AT << "\n");
-#endif
-  return AT;
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name,
+                                    ArrayRef<Type*> Elements, bool isPacked) {
+  StructType *ST = createNamed(Context, Name);
+  ST->setBody(Elements, isPacked);
+  return ST;
 }
 
-bool ArrayType::isValidElementType(const Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements,
+                                    bool isPacked) {
+  assert(!Elements.empty() &&
+         "This method may not be invoked with an empty list");
+  return createNamed(Elements[0]->getContext(), Name, Elements, isPacked);
 }
 
-VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
-  assert(ElementType && "Can't get vector of <null> types!");
-
-  VectorValType PVT(ElementType, NumElements);
-  VectorType *PT = 0;
-  
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  
-  PT = pImpl->VectorTypes.get(PVT);
-    
-  if (!PT) {
-    pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements));
+StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
   }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
-#endif
-  return PT;
-}
-
-bool VectorType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
-         ElemTy->isOpaqueTy();
+  return llvm::StructType::createNamed(Ctx, Name, StructFields);
 }
 
-//===----------------------------------------------------------------------===//
-// Struct Type Factory...
-//
-
-StructType *StructType::get(LLVMContext &Context,
-                            ArrayRef<const Type*> ETypes, 
-                            bool isPacked) {
-  StructValType STV(ETypes, isPacked);
-  StructType *ST = 0;
-  
-  LLVMContextImpl *pImpl = Context.pImpl;
+StringRef StructType::getName() const {
+  assert(!isAnonymous() && "Anonymous structs never have names");
+  if (SymbolTableEntry == 0) return StringRef();
   
-  ST = pImpl->StructTypes.get(STV);
-    
-  if (!ST) {
-    // Value not found.  Derive a new type!
-    ST = (StructType*) operator new(sizeof(StructType) +
-                                    sizeof(PATypeHandle) * ETypes.size());
-    new (ST) StructType(Context, ETypes, isPacked);
-    pImpl->StructTypes.add(STV, ST);
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *ST << "\n");
-#endif
-  return ST;
+  return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
 }
 
-StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
+void StructType::setBody(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
   va_list ap;
-  std::vector<const llvm::Type*> StructFields;
+  SmallVector<llvm::Type*, 8> StructFields;
   va_start(ap, type);
   while (type) {
     StructFields.push_back(type);
     type = va_arg(ap, llvm::Type*);
   }
-  return llvm::StructType::get(Context, StructFields);
+  setBody(StructFields);
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
@@ -973,278 +529,159 @@ bool StructType::isValidElementType(const Type *ElemTy) {
          !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
 }
 
-
-//===----------------------------------------------------------------------===//
-// Pointer Type Factory...
-//
-
-PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
-  assert(ValueType && "Can't get a pointer to <null> type!");
-  assert(ValueType->getTypeID() != VoidTyID &&
-         "Pointer to void is not valid, use i8* instead!");
-  assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
-  PointerValType PVT(ValueType, AddressSpace);
-
-  PointerType *PT = 0;
-  
-  LLVMContextImpl *pImpl = ValueType->getContext().pImpl;
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(const StructType *Other) const {
+  if (this == Other) return true;
   
-  PT = pImpl->PointerTypes.get(PVT);
+  if (isPacked() != Other->isPacked() ||
+      getNumElements() != Other->getNumElements())
+    return false;
   
-  if (!PT) {
-    // Value not found.  Derive a new type!
-    pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace));
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
-#endif
-  return PT;
+  return std::equal(element_begin(), element_end(), Other->element_begin());
 }
 
-const PointerType *Type::getPointerTo(unsigned addrs) const {
-  return PointerType::get(this, addrs);
-}
 
-bool PointerType::isValidElementType(const Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy();
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+  StringMap<StructType*>::iterator I =
+    getContext().pImpl->NamedStructTypes.find(Name);
+  if (I != getContext().pImpl->NamedStructTypes.end())
+    return I->second;
+  return 0;
 }
 
 
 //===----------------------------------------------------------------------===//
-// Opaque Type Factory...
-//
+//                       CompositeType Implementation
+//===----------------------------------------------------------------------===//
 
-OpaqueType *OpaqueType::get(LLVMContext &C) {
-  OpaqueType *OT = new OpaqueType(C);       // All opaque types are distinct.
-  LLVMContextImpl *pImpl = C.pImpl;
-  pImpl->OpaqueTypes.insert(OT);
-  return OT;
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    // Structure indexes require 32-bit integer constants.
+    if (V->getType()->isIntegerTy(32))
+      if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+        return CU->getZExtValue() < STy->getNumElements();
+    return false;
+  }
+  
+  // Sequential types can be indexed by any integer.
+  return V->getType()->isIntegerTy();
 }
 
+bool CompositeType::indexValid(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this))
+    return Idx < STy->getNumElements();
+  // Sequential types can be indexed by any integer.
+  return true;
+}
 
 
 //===----------------------------------------------------------------------===//
-//                     Derived Type Refinement Functions
+//                           ArrayType Implementation
 //===----------------------------------------------------------------------===//
 
-// addAbstractTypeUser - Notify an abstract type that there is a new user of
-// it.  This function is called primarily by the PATypeHandle class.
-void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
-  assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
-  AbstractTypeUsers.push_back(U);
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+  : SequentialType(ArrayTyID, ElType) {
+  NumElements = NumEl;
 }
 
 
-// removeAbstractTypeUser - Notify an abstract type that a user of the class
-// no longer has a handle to the type.  This function is called primarily by
-// the PATypeHandle class.  When there are no users of the abstract type, it
-// is annihilated, because there is no way to get a reference to it ever again.
-//
-void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
-  
-  // Search from back to front because we will notify users from back to
-  // front.  Also, it is likely that there will be a stack like behavior to
-  // users that register and unregister users.
-  //
-  unsigned i;
-  for (i = AbstractTypeUsers.size(); AbstractTypeUsers[i-1] != U; --i)
-    assert(i != 0 && "AbstractTypeUser not in user list!");
-
-  --i;  // Convert to be in range 0 <= i < size()
-  assert(i < AbstractTypeUsers.size() && "Index out of range!");  // Wraparound?
-
-  AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "  remAbstractTypeUser[" << (void*)this << ", "
-               << *this << "][" << i << "] User = " << U << "\n");
-#endif
-
-  if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this
-                 << ">[" << (void*)this << "]" << "\n");
-#endif
+ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(isValidElementType(ElementType) && "Invalid type for array element!");
+    
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  ArrayType *&Entry = 
+    pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
   
-    this->destroy();
-  }
-}
-
-// refineAbstractTypeTo - This function is used when it is discovered
-// that the 'this' abstract type is actually equivalent to the NewType
-// specified. This causes all users of 'this' to switch to reference the more 
-// concrete type NewType and for 'this' to be deleted.  Only used for internal
-// callers.
-//
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
-  assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
-  assert(this != NewType && "Can't refine to myself!");
-  assert(ForwardType == 0 && "This type has already been refined!");
-
-  LLVMContextImpl *pImpl = getContext().pImpl;
-
-  // The descriptions may be out of date.  Conservatively clear them all!
-  pImpl->AbstractTypeDescriptions.clear();
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " "
-               << *this << "] to [" << (void*)NewType << " "
-               << *NewType << "]!\n");
-#endif
-
-  // Make sure to put the type to be refined to into a holder so that if IT gets
-  // refined, that we will not continue using a dead reference...
-  //
-  PATypeHolder NewTy(NewType);
-  // Any PATypeHolders referring to this type will now automatically forward to
-  // the type we are resolved to.
-  ForwardType = NewType;
-  if (ForwardType->isAbstract())
-    ForwardType->addRef();
-
-  // Add a self use of the current type so that we don't delete ourself until
-  // after the function exits.
-  //
-  PATypeHolder CurrentTy(this);
-
-  // To make the situation simpler, we ask the subclass to remove this type from
-  // the type map, and to replace any type uses with uses of non-abstract types.
-  // This dramatically limits the amount of recursive type trouble we can find
-  // ourselves in.
-  dropAllTypeUses();
-
-  // Iterate over all of the uses of this type, invoking callback.  Each user
-  // should remove itself from our use list automatically.  We have to check to
-  // make sure that NewTy doesn't _become_ 'this'.  If it does, resolving types
-  // will not cause users to drop off of the use list.  If we resolve to ourself
-  // we succeed!
-  //
-  while (!AbstractTypeUsers.empty() && NewTy != this) {
-    AbstractTypeUser *User = AbstractTypeUsers.back();
-
-    unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
-                 << "] of abstract type [" << (void*)this << " "
-                 << *this << "] to [" << (void*)NewTy.get() << " "
-                 << *NewTy << "]!\n");
-#endif
-    User->refineAbstractType(this, NewTy);
-
-    assert(AbstractTypeUsers.size() != OldSize &&
-           "AbsTyUser did not remove self from user list!");
-  }
-
-  // If we were successful removing all users from the type, 'this' will be
-  // deleted when the last PATypeHolder is destroyed or updated from this type.
-  // This may occur on exit of this function, as the CurrentTy object is
-  // destroyed.
-}
-
-// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
-// the current type has transitioned from being abstract to being concrete.
-//
-void DerivedType::notifyUsesThatTypeBecameConcrete() {
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
-#endif
-
-  unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
-  while (!AbstractTypeUsers.empty()) {
-    AbstractTypeUser *ATU = AbstractTypeUsers.back();
-    ATU->typeBecameConcrete(this);
-
-    assert(AbstractTypeUsers.size() < OldSize-- &&
-           "AbstractTypeUser did not remove itself from the use list!");
-  }
-}
-
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void FunctionType::refineAbstractType(const DerivedType *OldType,
-                                      const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->FunctionTypes.RefineAbstractType(this, OldType, NewType);
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+  return Entry;
 }
 
-void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->FunctionTypes.TypeBecameConcrete(this, AbsTy);
+bool ArrayType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
 }
 
+//===----------------------------------------------------------------------===//
+//                          VectorType Implementation
+//===----------------------------------------------------------------------===//
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void ArrayType::refineAbstractType(const DerivedType *OldType,
-                                   const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->ArrayTypes.RefineAbstractType(this, OldType, NewType);
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+  : SequentialType(VectorTyID, ElType) {
+  NumElements = NumEl;
 }
 
-void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->ArrayTypes.TypeBecameConcrete(this, AbsTy);
+VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+  assert(isValidElementType(ElementType) &&
+         "Elements of a VectorType must be a primitive type");
+  
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  VectorType *&Entry = ElementType->getContext().pImpl
+    ->VectorTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+  return Entry;
 }
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void VectorType::refineAbstractType(const DerivedType *OldType,
-                                   const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->VectorTypes.RefineAbstractType(this, OldType, NewType);
+bool VectorType::isValidElementType(const Type *ElemTy) {
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
 }
 
-void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->VectorTypes.TypeBecameConcrete(this, AbsTy);
-}
+//===----------------------------------------------------------------------===//
+//                         PointerType Implementation
+//===----------------------------------------------------------------------===//
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void StructType::refineAbstractType(const DerivedType *OldType,
-                                    const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->StructTypes.RefineAbstractType(this, OldType, NewType);
-}
+PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) {
+  Type *EltTy = const_cast<Type*>(eltTy);
+  assert(EltTy && "Can't get a pointer to <null> type!");
+  assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+  
+  LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+  
+  // Since AddressSpace #0 is the common case, we special case it.
+  PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+     : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
 
-void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->StructTypes.TypeBecameConcrete(this, AbsTy);
+  if (Entry == 0)
+    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+  return Entry;
 }
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void PointerType::refineAbstractType(const DerivedType *OldType,
-                                     const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->PointerTypes.RefineAbstractType(this, OldType, NewType);
-}
 
-void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy);
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+  : SequentialType(PointerTyID, E) {
+  setSubclassData(AddrSpace);
 }
 
-bool SequentialType::indexValid(const Value *V) const {
-  if (V->getType()->isIntegerTy()) 
-    return true;
-  return false;
+PointerType *Type::getPointerTo(unsigned addrs) const {
+  return PointerType::get(this, addrs);
 }
 
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
-  T.print(OS);
-  return OS;
-}
+bool PointerType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy();
 }
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
deleted file mode 100644
index d68a44bd6711..000000000000
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===-- TypeSymbolTable.cpp - Implement the TypeSymbolTable class ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TypeSymbolTable class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-using namespace llvm;
-
-#define DEBUG_SYMBOL_TABLE 0
-#define DEBUG_ABSTYPE 0
-
-TypeSymbolTable::~TypeSymbolTable() {
-  // Drop all abstract type references in the type plane...
-  for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
-    if (TI->second->isAbstract())   // If abstract, drop the reference...
-      cast<DerivedType>(TI->second)->removeAbstractTypeUser(this);
-  }
-}
-
-std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const {
-  std::string TryName = BaseName;
-  
-  const_iterator End = tmap.end();
-
-  // See if the name exists
-  while (tmap.find(TryName) != End)            // Loop until we find a free
-    TryName = BaseName.str() + utostr(++LastUnique); // name in the symbol table
-  return TryName;
-}
-
-// lookup a type by name - returns null on failure
-Type* TypeSymbolTable::lookup(StringRef Name) const {
-  const_iterator TI = tmap.find(Name);
-  Type* result = 0;
-  if (TI != tmap.end())
-    result = const_cast<Type*>(TI->second);
-  return result;
-}
-
-// remove - Remove a type from the symbol table...
-Type* TypeSymbolTable::remove(iterator Entry) {
-  assert(Entry != tmap.end() && "Invalid entry to remove!");
-  const Type* Result = Entry->second;
-
-#if DEBUG_SYMBOL_TABLE
-  dump();
-  dbgs() << " Removing Value: " << Result->getDescription() << "\n";
-#endif
-
-  tmap.erase(Entry);
-  
-  // If we are removing an abstract type, remove the symbol table from it's use
-  // list...
-  if (Result->isAbstract()) {
-#if DEBUG_ABSTYPE
-    dbgs() << "Removing abstract type from symtab"
-           << Result->getDescription()
-           << "\n";
-#endif
-    cast<DerivedType>(Result)->removeAbstractTypeUser(this);
-  }
-
-  return const_cast<Type*>(Result);
-}
-
-
-// insert - Insert a type into the symbol table with the specified name...
-void TypeSymbolTable::insert(StringRef Name, const Type* T) {
-  assert(T && "Can't insert null type into symbol table!");
-
-  if (tmap.insert(std::make_pair(Name, T)).second) {
-    // Type inserted fine with no conflict.
-    
-#if DEBUG_SYMBOL_TABLE
-    dump();
-    dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
-#endif
-  } else {
-    // If there is a name conflict...
-    
-    // Check to see if there is a naming conflict.  If so, rename this type!
-    std::string UniqueName = Name;
-    if (lookup(Name))
-      UniqueName = getUniqueName(Name);
-    
-#if DEBUG_SYMBOL_TABLE
-    dump();
-    dbgs() << " Inserting type: " << UniqueName << ": "
-           << T->getDescription() << "\n";
-#endif
-
-    // Insert the tmap entry
-    tmap.insert(make_pair(UniqueName, T));
-  }
-  
-  // If we are adding an abstract type, add the symbol table to it's use list.
-  if (T->isAbstract()) {
-    cast<DerivedType>(T)->addAbstractTypeUser(this);
-#if DEBUG_ABSTYPE
-    dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n";
-#endif
-  }
-}
-
-// This function is called when one of the types in the type plane are refined
-void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
-                                         const Type *NewType) {
-  // Loop over all of the types in the symbol table, replacing any references
-  // to OldType with references to NewType.  Note that there may be multiple
-  // occurrences, and although we only need to remove one at a time, it's
-  // faster to remove them all in one pass.
-  //
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    // FIXME when Types aren't const.
-    if (I->second == const_cast<DerivedType *>(OldType)) {
-#if DEBUG_ABSTYPE
-      dbgs() << "Removing type " << OldType->getDescription() << "\n";
-#endif
-      OldType->removeAbstractTypeUser(this);
-
-      // TODO FIXME when types aren't const
-      I->second = const_cast<Type *>(NewType);
-      if (NewType->isAbstract()) {
-#if DEBUG_ABSTYPE
-        dbgs() << "Added type " << NewType->getDescription() << "\n";
-#endif
-        cast<DerivedType>(NewType)->addAbstractTypeUser(this);
-      }
-    }
-  }
-}
-
-
-// Handle situation where type becomes Concreate from Abstract
-void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
-  // Loop over all of the types in the symbol table, dropping any abstract
-  // type user entries for AbsTy which occur because there are names for the
-  // type.
-  for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
-    if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
-      AbsTy->removeAbstractTypeUser(this);
-}
-
-static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
-  dbgs() << "  '" << T.first << "' = ";
-  T.second->dump();
-  dbgs() << "\n";
-}
-
-void TypeSymbolTable::dump() const {
-  dbgs() << "TypeSymbolPlane: ";
-  for_each(tmap.begin(), tmap.end(), DumpTypes);
-}
-
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
deleted file mode 100644
index ad09478bbcfd..000000000000
--- a/lib/VMCore/TypesContext.h
+++ /dev/null
@@ -1,426 +0,0 @@
-//===-- TypesContext.h - Types-related Context Internals ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines various helper methods and classes used by
-// LLVMContextImpl for creating and managing types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPESCONTEXT_H
-#define LLVM_TYPESCONTEXT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include <map>
-
-
-//===----------------------------------------------------------------------===//
-//                       Derived Type Factory Functions
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-/// getSubElementHash - Generate a hash value for all of the SubType's of this
-/// type.  The hash value is guaranteed to be zero if any of the subtypes are 
-/// an opaque type.  Otherwise we try to mix them in as well as possible, but do
-/// not look at the subtype's subtype's.
-static unsigned getSubElementHash(const Type *Ty) {
-  unsigned HashVal = 0;
-  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-       I != E; ++I) {
-    HashVal *= 32;
-    const Type *SubTy = I->get();
-    HashVal += SubTy->getTypeID();
-    switch (SubTy->getTypeID()) {
-    default: break;
-    case Type::OpaqueTyID: return 0;    // Opaque -> hash = 0 no matter what.
-    case Type::IntegerTyID:
-      HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
-      break;
-    case Type::FunctionTyID:
-      HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + 
-                 cast<FunctionType>(SubTy)->isVarArg();
-      break;
-    case Type::ArrayTyID:
-      HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
-      break;
-    case Type::VectorTyID:
-      HashVal ^= cast<VectorType>(SubTy)->getNumElements();
-      break;
-    case Type::StructTyID:
-      HashVal ^= cast<StructType>(SubTy)->getNumElements();
-      break;
-    case Type::PointerTyID:
-      HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
-      break;
-    }
-  }
-  return HashVal ? HashVal : 1;  // Do not return zero unless opaque subty.
-}
-
-//===----------------------------------------------------------------------===//
-// Integer Type Factory...
-//
-class IntegerValType {
-  uint32_t bits;
-public:
-  IntegerValType(uint32_t numbits) : bits(numbits) {}
-
-  static IntegerValType get(const IntegerType *Ty) {
-    return IntegerValType(Ty->getBitWidth());
-  }
-
-  static unsigned hashTypeStructure(const IntegerType *Ty) {
-    return (unsigned)Ty->getBitWidth();
-  }
-
-  inline bool operator<(const IntegerValType &IVT) const {
-    return bits < IVT.bits;
-  }
-};
-
-// PointerValType - Define a class to hold the key that goes into the TypeMap
-//
-class PointerValType {
-  const Type *ValTy;
-  unsigned AddressSpace;
-public:
-  PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
-
-  static PointerValType get(const PointerType *PT) {
-    return PointerValType(PT->getElementType(), PT->getAddressSpace());
-  }
-
-  static unsigned hashTypeStructure(const PointerType *PT) {
-    return getSubElementHash(PT);
-  }
-
-  bool operator<(const PointerValType &MTV) const {
-    if (AddressSpace < MTV.AddressSpace) return true;
-    return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Array Type Factory...
-//
-class ArrayValType {
-  const Type *ValTy;
-  uint64_t Size;
-public:
-  ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
-
-  static ArrayValType get(const ArrayType *AT) {
-    return ArrayValType(AT->getElementType(), AT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const ArrayType *AT) {
-    return (unsigned)AT->getNumElements();
-  }
-
-  inline bool operator<(const ArrayValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Vector Type Factory...
-//
-class VectorValType {
-  const Type *ValTy;
-  unsigned Size;
-public:
-  VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
-
-  static VectorValType get(const VectorType *PT) {
-    return VectorValType(PT->getElementType(), PT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const VectorType *PT) {
-    return PT->getNumElements();
-  }
-
-  inline bool operator<(const VectorValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-
-// StructValType - Define a class to hold the key that goes into the TypeMap
-//
-class StructValType {
-  std::vector<const Type*> ElTypes;
-  bool packed;
-public:
-  StructValType(ArrayRef<const Type*> args, bool isPacked)
-    : ElTypes(args.vec()), packed(isPacked) {}
-
-  static StructValType get(const StructType *ST) {
-    std::vector<const Type *> ElTypes;
-    ElTypes.reserve(ST->getNumElements());
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
-      ElTypes.push_back(ST->getElementType(i));
-
-    return StructValType(ElTypes, ST->isPacked());
-  }
-
-  static unsigned hashTypeStructure(const StructType *ST) {
-    return ST->getNumElements();
-  }
-
-  inline bool operator<(const StructValType &STV) const {
-    if (ElTypes < STV.ElTypes) return true;
-    else if (ElTypes > STV.ElTypes) return false;
-    else return (int)packed < (int)STV.packed;
-  }
-};
-
-// FunctionValType - Define a class to hold the key that goes into the TypeMap
-//
-class FunctionValType {
-  const Type *RetTy;
-  std::vector<const Type*> ArgTypes;
-  bool isVarArg;
-public:
-  FunctionValType(const Type *ret, ArrayRef<const Type*> args, bool isVA)
-    : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {}
-
-  static FunctionValType get(const FunctionType *FT);
-
-  static unsigned hashTypeStructure(const FunctionType *FT) {
-    unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
-    return Result;
-  }
-
-  inline bool operator<(const FunctionValType &MTV) const {
-    if (RetTy < MTV.RetTy) return true;
-    if (RetTy > MTV.RetTy) return false;
-    if (isVarArg < MTV.isVarArg) return true;
-    if (isVarArg > MTV.isVarArg) return false;
-    if (ArgTypes < MTV.ArgTypes) return true;
-    if (ArgTypes > MTV.ArgTypes) return false;
-    return false;
-  }
-};
-
-class TypeMapBase {
-protected:
-  /// TypesByHash - Keep track of types by their structure hash value.  Note
-  /// that we only keep track of types that have cycles through themselves in
-  /// this map.
-  ///
-  std::multimap<unsigned, PATypeHolder> TypesByHash;
-
-  ~TypeMapBase() {
-    // PATypeHolder won't destroy non-abstract types.
-    // We can't destroy them by simply iterating, because
-    // they may contain references to each-other.
-    for (std::multimap<unsigned, PATypeHolder>::iterator I
-         = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
-      Type *Ty = const_cast<Type*>(I->second.Ty);
-      I->second.destroy();
-      // We can't invoke destroy or delete, because the type may
-      // contain references to already freed types.
-      // So we have to destruct the object the ugly way.
-      if (Ty) {
-        Ty->AbstractTypeUsers.clear();
-        static_cast<const Type*>(Ty)->Type::~Type();
-        operator delete(Ty);
-      }
-    }
-  }
-
-public:
-  void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
-    std::multimap<unsigned, PATypeHolder>::iterator I =
-      TypesByHash.lower_bound(Hash);
-    for (; I != TypesByHash.end() && I->first == Hash; ++I) {
-      if (I->second == Ty) {
-        TypesByHash.erase(I);
-        return;
-      }
-    }
-
-    // This must be do to an opaque type that was resolved.  Switch down to hash
-    // code of zero.
-    assert(Hash && "Didn't find type entry!");
-    RemoveFromTypesByHash(0, Ty);
-  }
-
-  /// TypeBecameConcrete - When Ty gets a notification that TheType just became
-  /// concrete, drop uses and make Ty non-abstract if we should.
-  void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
-    // If the element just became concrete, remove 'ty' from the abstract
-    // type user list for the type.  Do this for as many times as Ty uses
-    // OldType.
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (I->get() == TheType)
-        TheType->removeAbstractTypeUser(Ty);
-
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-};
-
-// TypeMap - Make sure that only one instance of a particular type may be
-// created on any given run of the compiler... note that this involves updating
-// our map if an abstract type gets refined somehow.
-//
-template<class ValType, class TypeClass>
-class TypeMap : public TypeMapBase {
-  std::map<ValType, PATypeHolder> Map;
-public:
-  typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
-
-  inline TypeClass *get(const ValType &V) {
-    iterator I = Map.find(V);
-    return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
-  }
-
-  inline void add(const ValType &V, TypeClass *Ty) {
-    Map.insert(std::make_pair(V, Ty));
-
-    // If this type has a cycle, remember it.
-    TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
-    print("add");
-  }
-  
-  /// RefineAbstractType - This method is called after we have merged a type
-  /// with another one.  We must now either merge the type away with
-  /// some other type or reinstall it in the map with it's new configuration.
-  void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
-                        const Type *NewType) {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
-                 << "], " << (void*)NewType << " [" << *NewType << "])\n");
-#endif
-    
-    // Otherwise, we are changing one subelement type into another.  Clearly the
-    // OldType must have been abstract, making us abstract.
-    assert(Ty->isAbstract() && "Refining a non-abstract type!");
-    assert(OldType != NewType);
-
-    // Make a temporary type holder for the type so that it doesn't disappear on
-    // us when we erase the entry from the map.
-    PATypeHolder TyHolder = Ty;
-
-    // The old record is now out-of-date, because one of the children has been
-    // updated.  Remove the obsolete entry from the map.
-    unsigned NumErased = Map.erase(ValType::get(Ty));
-    assert(NumErased && "Element not found!"); (void)NumErased;
-
-    // Remember the structural hash for the type before we start hacking on it,
-    // in case we need it later.
-    unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
-
-    // Find the type element we are refining... and change it now!
-    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
-      if (Ty->ContainedTys[i] == OldType)
-        Ty->ContainedTys[i] = NewType;
-    unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
-    
-    // If there are no cycles going through this node, we can do a simple,
-    // efficient lookup in the map, instead of an inefficient nasty linear
-    // lookup.
-    if (!TypeHasCycleThroughItself(Ty)) {
-      typename std::map<ValType, PATypeHolder>::iterator I;
-      bool Inserted;
-
-      tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
-      if (!Inserted) {
-        // Refined to a different type altogether?
-        RemoveFromTypesByHash(OldTypeHash, Ty);
-
-        // We already have this type in the table.  Get rid of the newly refined
-        // type.
-        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-        Ty->refineAbstractTypeTo(NewTy);
-        return;
-      }
-    } else {
-      // Now we check to see if there is an existing entry in the table which is
-      // structurally identical to the newly refined type.  If so, this type
-      // gets refined to the pre-existing type.
-      //
-      std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
-      tie(I, E) = TypesByHash.equal_range(NewTypeHash);
-      Entry = E;
-      for (; I != E; ++I) {
-        if (I->second == Ty) {
-          // Remember the position of the old type if we see it in our scan.
-          Entry = I;
-          continue;
-        }
-        
-        if (!TypesEqual(Ty, I->second))
-          continue;
-        
-        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-
-        // Remove the old entry form TypesByHash.  If the hash values differ
-        // now, remove it from the old place.  Otherwise, continue scanning
-        // within this hashcode to reduce work.
-        if (NewTypeHash != OldTypeHash) {
-          RemoveFromTypesByHash(OldTypeHash, Ty);
-        } else {
-          if (Entry == E) {
-            // Find the location of Ty in the TypesByHash structure if we
-            // haven't seen it already.
-            while (I->second != Ty) {
-              ++I;
-              assert(I != E && "Structure doesn't contain type??");
-            }
-            Entry = I;
-          }
-          TypesByHash.erase(Entry);
-        }
-        Ty->refineAbstractTypeTo(NewTy);
-        return;
-      }
-
-      // If there is no existing type of the same structure, we reinsert an
-      // updated record into the map.
-      Map.insert(std::make_pair(ValType::get(Ty), Ty));
-    }
-
-    // If the hash codes differ, update TypesByHash
-    if (NewTypeHash != OldTypeHash) {
-      RemoveFromTypesByHash(OldTypeHash, Ty);
-      TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
-    }
-    
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-
-  void print(const char *Arg) const {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n");
-    unsigned i = 0;
-    for (typename std::map<ValType, PATypeHolder>::const_iterator I
-           = Map.begin(), E = Map.end(); I != E; ++I)
-      DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " "
-                   << *I->second.get() << "\n");
-#endif
-  }
-
-  void dump() const { print("dump output"); }
-};
-}
-
-#endif
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index 2258b8d985ae..359a1517ab79 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -135,11 +135,9 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
 
 User *Use::getUser() const {
   const Use *End = getImpliedUser();
-  const PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<const AugmentedUse*>(End - 1)->ref);
-  User *She = ref.getPointer();
-  return ref.getInt()
-    ? She
+  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+  return ref->getInt()
+    ? ref->getPointer()
     : (User*)End;
 }
 
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index 2f4587debb66..f01fa349adfd 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -40,14 +40,12 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 //===----------------------------------------------------------------------===//
 
 Use *User::allocHungoffUses(unsigned N) const {
-  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
-                                                + sizeof(AugmentedUse)
-                                                - sizeof(Use)));
+  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+  // the User.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  Use *Begin = static_cast<Use*>(::operator new(size));
   Use *End = Begin + N;
-  PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<AugmentedUse&>(End[-1]).ref);
-  ref.setPointer(const_cast<User*>(this));
-  ref.setInt(1);
+  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
   return Use::initTags(Begin, End);
 }
 
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 29f6a8094f0b..f1815e377edc 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -35,22 +35,21 @@ using namespace llvm;
 //                                Value Class
 //===----------------------------------------------------------------------===//
 
-static inline const Type *checkType(const Type *Ty) {
+static inline Type *checkType(const Type *Ty) {
   assert(Ty && "Value defined with a null type: Error!");
-  return Ty;
+  return const_cast<Type*>(Ty);
 }
 
 Value::Value(const Type *ty, unsigned scid)
   : SubclassID(scid), HasValueHandle(0),
-    SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)),
+    SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
     UseList(0), Name(0) {
+  // FIXME: Why isn't this in the subclass gunk??
   if (isa<CallInst>(this) || isa<InvokeInst>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            ty->isOpaqueTy() || VTy->isStructTy()) &&
-           "invalid CallInst  type!");
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+           "invalid CallInst type!");
   else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            ty->isOpaqueTy()) &&
+    assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
            "Cannot create non-first-class values except for constants!");
 }
 
@@ -281,17 +280,16 @@ void Value::takeName(Value *V) {
 }
 
 
-// uncheckedReplaceAllUsesWith - This is exactly the same as replaceAllUsesWith,
-// except that it doesn't have all of the asserts.  The asserts fail because we
-// are half-way done resolving types, which causes some types to exist as two
-// different Type*'s at the same time.  This is a sledgehammer to work around
-// this problem.
-//
-void Value::uncheckedReplaceAllUsesWith(Value *New) {
+void Value::replaceAllUsesWith(Value *New) {
+  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+  assert(New->getType() == getType() &&
+         "replaceAllUses of value with new value of different type!");
+
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsRAUWd(this, New);
-
+  
   while (!use_empty()) {
     Use &U = *UseList;
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -302,18 +300,12 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) {
         continue;
       }
     }
-
+    
     U.set(New);
   }
-}
-
-void Value::replaceAllUsesWith(Value *New) {
-  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
-  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
-  assert(New->getType() == getType() &&
-         "replaceAllUses of value with new value of different type!");
-
-  uncheckedReplaceAllUsesWith(New);
+  
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
 Value *Value::stripPointerCasts() {
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 254bf06439d9..f1c970361a50 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -25,7 +25,7 @@ ValueSymbolTable::~ValueSymbolTable() {
 #ifndef NDEBUG   // Only do this in -g mode...
   for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
     dbgs() << "Value still in symbol table! Type = '"
-           << VI->getValue()->getType()->getDescription() << "' Name = '"
+           << *VI->getValue()->getType() << "' Name = '"
            << VI->getKeyData() << "'\n";
   assert(vmap.empty() && "Values remain in symbol table!");
 #endif
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index c054ae46f23b..21a1f034446a 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -133,6 +133,7 @@ std::string EVT::getEVTString() const {
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
   case MVT::Metadata:return "Metadata";
+  case MVT::untyped: return "untyped";
   }
 }
 
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 139e03523019..b146b896cbfb 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -49,7 +49,6 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -109,54 +108,6 @@ INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
 static char &PreVerifyID = PreVerifier::ID;
 
 namespace {
-  class TypeSet : public AbstractTypeUser {
-  public:
-    TypeSet() {}
-
-    /// Insert a type into the set of types.
-    bool insert(const Type *Ty) {
-      if (!Types.insert(Ty))
-        return false;
-      if (Ty->isAbstract())
-        Ty->addAbstractTypeUser(this);
-      return true;
-    }
-
-    // Remove ourselves as abstract type listeners for any types that remain
-    // abstract when the TypeSet is destroyed.
-    ~TypeSet() {
-      for (SmallSetVector<const Type *, 16>::iterator I = Types.begin(),
-             E = Types.end(); I != E; ++I) {
-        const Type *Ty = *I;
-        if (Ty->isAbstract())
-          Ty->removeAbstractTypeUser(this);
-      }
-    }
-
-    // Abstract type user interface.
-
-    /// Remove types from the set when refined. Do not insert the type it was
-    /// refined to because that type hasn't been verified yet.
-    void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-      Types.remove(OldTy);
-      OldTy->removeAbstractTypeUser(this);
-    }
-
-    /// Stop listening for changes to a type which is no longer abstract.
-    void typeBecameConcrete(const DerivedType *AbsTy) {
-      AbsTy->removeAbstractTypeUser(this);
-    }
-
-    void dump() const {}
-
-  private:
-    SmallSetVector<const Type *, 16> Types;
-
-    // Disallow copying.
-    TypeSet(const TypeSet &);
-    TypeSet &operator=(const TypeSet &);
-  };
-
   struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
     static char ID; // Pass ID, replacement for typeid
     bool Broken;          // Is this module found to be broken?
@@ -176,9 +127,6 @@ namespace {
     /// an instruction in the same block.
     SmallPtrSet<Instruction*, 16> InstsInThisBlock;
 
-    /// Types - keep track of the types that have been checked already.
-    TypeSet Types;
-
     /// MDNodes - keep track of the metadata nodes that have been checked
     /// already.
     SmallPtrSet<MDNode *, 32> MDNodes;
@@ -199,7 +147,6 @@ namespace {
     bool doInitialization(Module &M) {
       Mod = &M;
       Context = &M.getContext();
-      verifyTypeSymbolTable(M.getTypeSymbolTable());
 
       // If this is a real pass, in a pass manager, we must abort before
       // returning back to the pass manager, or else the pass manager may try to
@@ -285,7 +232,6 @@ namespace {
 
 
     // Verification methods...
-    void verifyTypeSymbolTable(TypeSymbolTable &ST);
     void visitGlobalValue(GlobalValue &GV);
     void visitGlobalVariable(GlobalVariable &GV);
     void visitGlobalAlias(GlobalAlias &GA);
@@ -345,7 +291,6 @@ namespace {
                               bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
                              const Value *V);
-    void VerifyType(const Type *Ty);
 
     void WriteValue(const Value *V) {
       if (!V) return;
@@ -359,8 +304,7 @@ namespace {
 
     void WriteType(const Type *T) {
       if (!T) return;
-      MessagesStr << ' ';
-      WriteTypeSymbolic(MessagesStr, T, Mod);
+      MessagesStr << ' ' << *T;
     }
 
 
@@ -568,11 +512,6 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
   }
 }
 
-void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
-  for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
-    VerifyType(I->second);
-}
-
 // VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
 void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
@@ -1139,9 +1078,6 @@ void Verifier::visitPHINode(PHINode &PN) {
   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
     Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
             "PHI node operands are not the same type as the result!", &PN);
-    Assert1(isa<BasicBlock>(PN.getOperand(
-                PHINode::getOperandNumForIncomingBlock(i))),
-            "PHI node incoming block is not a BasicBlock!", &PN);
   }
 
   // All other PHI node constraints are checked in the visitBasicBlock method.
@@ -1195,11 +1131,11 @@ void Verifier::VerifyCallSite(CallSite CS) {
     }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (!CS.getCalledFunction() ||
+  if (CS.getCalledFunction() == 0 ||
       !CS.getCalledFunction()->getName().startswith("llvm.")) {
     for (FunctionType::param_iterator PI = FTy->param_begin(),
            PE = FTy->param_end(); PI != PE; ++PI)
-      Assert1(!PI->get()->isMetadataTy(),
+      Assert1(!(*PI)->isMetadataTy(),
               "Function has metadata parameter but isn't an intrinsic", I);
   }
 
@@ -1382,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
 
 void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
   Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
-                                           EVI.idx_begin(), EVI.idx_end()) ==
+                                           EVI.getIndices()) ==
           EVI.getType(),
           "Invalid ExtractValueInst operands!", &EVI);
   
@@ -1391,7 +1327,7 @@ void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
 
 void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
   Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
-                                           IVI.idx_begin(), IVI.idx_end()) ==
+                                           IVI.getIndices()) ==
           IVI.getOperand(1)->getType(),
           "Invalid InsertValueInst operands!", &IVI);
   
@@ -1482,8 +1418,10 @@ void Verifier::visitInstruction(Instruction &I) {
         // PHI nodes differ from other nodes because they actually "use" the
         // value in the predecessor basic blocks they correspond to.
         BasicBlock *UseBlock = BB;
-        if (isa<PHINode>(I))
-          UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          unsigned j = PHINode::getIncomingValueNumForOperand(i);
+          UseBlock = PN->getIncomingBlock(j);
+        }
         Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
                 Op, &I);
 
@@ -1515,10 +1453,11 @@ void Verifier::visitInstruction(Instruction &I) {
                 return;
               }
         }
-      } else if (isa<PHINode>(I)) {
+      } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
         // PHI nodes are more difficult than other nodes because they actually
         // "use" the value in the predecessor basic blocks they correspond to.
-        BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        unsigned j = PHINode::getIncomingValueNumForOperand(i);
+        BasicBlock *PredBB = PN->getIncomingBlock(j);
         Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
                            !DT->isReachableFromEntry(PredBB)),
                 "Instruction does not dominate all uses!", Op, &I);
@@ -1542,69 +1481,6 @@ void Verifier::visitInstruction(Instruction &I) {
     }
   }
   InstsInThisBlock.insert(&I);
-
-  VerifyType(I.getType());
-}
-
-/// VerifyType - Verify that a type is well formed.
-///
-void Verifier::VerifyType(const Type *Ty) {
-  if (!Types.insert(Ty)) return;
-
-  Assert1(Context == &Ty->getContext(),
-          "Type context does not match Module context!", Ty);
-
-  switch (Ty->getTypeID()) {
-  case Type::FunctionTyID: {
-    const FunctionType *FTy = cast<FunctionType>(Ty);
-
-    const Type *RetTy = FTy->getReturnType();
-    Assert2(FunctionType::isValidReturnType(RetTy),
-            "Function type with invalid return type", RetTy, FTy);
-    VerifyType(RetTy);
-
-    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
-      const Type *ElTy = FTy->getParamType(i);
-      Assert2(FunctionType::isValidArgumentType(ElTy),
-              "Function type with invalid parameter type", ElTy, FTy);
-      VerifyType(ElTy);
-    }
-    break;
-  }
-  case Type::StructTyID: {
-    const StructType *STy = cast<StructType>(Ty);
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      const Type *ElTy = STy->getElementType(i);
-      Assert2(StructType::isValidElementType(ElTy),
-              "Structure type with invalid element type", ElTy, STy);
-      VerifyType(ElTy);
-    }
-    break;
-  }
-  case Type::ArrayTyID: {
-    const ArrayType *ATy = cast<ArrayType>(Ty);
-    Assert1(ArrayType::isValidElementType(ATy->getElementType()),
-            "Array type with invalid element type", ATy);
-    VerifyType(ATy->getElementType());
-    break;
-  }
-  case Type::PointerTyID: {
-    const PointerType *PTy = cast<PointerType>(Ty);
-    Assert1(PointerType::isValidElementType(PTy->getElementType()),
-            "Pointer type with invalid element type", PTy);
-    VerifyType(PTy->getElementType());
-    break;
-  }
-  case Type::VectorTyID: {
-    const VectorType *VTy = cast<VectorType>(Ty);
-    Assert1(VectorType::isValidElementType(VTy->getElementType()),
-            "Vector type with invalid element type", VTy);
-    VerifyType(VTy->getElementType());
-    break;
-  }
-  default:
-    break;
-  }
 }
 
 // Flags used by TableGen to mark intrinsic parameters with the